code-ranker-graph 3.0.2

Operations over the Code Ranker property-graph model: cycles, coupling, stats, snapshot.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
# Built-in metric registry — the data-driven home for every metric.
#
# In dependency order:
#   - `[categories.*]`  the metric groups (each metric references one).
#   - `[ast.*]`         tier-1 metrics measured directly from the AST during the
#                       tree walk — these are the variables a `formula_cel` formula reads.
#                       Every one carries a display spec and is emitted onto the
#                       node; the Halstead/structural base counts (eta1, eta2, n1,
#                       n2, spaces, branches, span_sloc) are kept out of the default
#                       table columns but emitted so the viewer can show each
#                       derived metric's live "formula = numbers" line. Each
#                       language refines their descriptions (the exact operator /
#                       operand tokens it counts) via its `[specs.<key>]`.
#   - `[fields.*]`      the derived metrics: each is a `formula_cel` formula over the AST
#                       inputs (and earlier fields), with its display spec.
#   - `[report]` (+ `[report.stats]`)  the views that
#                       reference the metric keys above.
#
# Spec fields on a measured `[ast.<key>]` / derived `[fields.<key>]` entry:
#   - `formula_cel`          (derived only) the executable CEL formula, computed at
#                    snapshot time by the registry engine over the AST inputs.
#   - `formula_pretty`/`formula_js`  display only — `formula_pretty` is the pretty
#                    formula shown in the viewer (NOT CEL); `formula_js` is the JS
#                    the viewer can re-run client-side.
#   - `label` is the base display name; `name` (the full name) and `short` (the
#                    column abbreviation) each default to `label` when omitted, so
#                    an entry only spells out the names that actually differ.
#   - the rest are display metadata (value_type, description, direction,
#                    category, omit_at).
#
# Editing a `formula_cel` formula or spec here changes the metric with no Rust change.
# log2/ln/pow/sqrt/sin are host functions registered by the engine (the exact f64
# ops Rust used). Inter-metric references are auto-ordered (e.g. `mi` after
# `volume`); the `formula_cel` formulas below are verbatim ports of the former Rust
# `derive()`, same operand order and zero gating, so emitted values are unchanged.

# ── field-omission defaults ───────────────────────────────────────────────────
# The fallbacks a metric entry inherits when it doesn't spell out the field —
# applies to the built-in `[ast.*]` / `[fields.*]` below AND to a user's
# `[metrics.<key>]`. The SINGLE source of these values (no literal in Rust).
[defaults]
value_type = "float"
omit_at = 0.0

# ── categories ────────────────────────────────────────────────────────────────
[categories.complexity]
label = "Complexity"
description = "Code complexity metrics"

[categories.halstead]
label = "Halstead"
description = "Halstead software metrics"

[categories.loc]
label = "Lines of Code"
description = "Lines of code breakdown"

[categories.maintainability]
label = "Maintainability"
description = "Maintainability index"

[categories.coupling]
label = "Coupling"
description = "Internal coupling (Henry-Kafura)"

# ── ast (tier-1, measured directly from the AST) ──────────────────────────────
# Halstead base counts — emitted so the derived formulas (length, vocabulary,
# effort, volume, …) can render their live "formula with this node's numbers" line
# in the viewer. They are the operator/operand tallies the Halstead model is built
# from; useful on their own only when reading a formula's derivation.
[ast.eta1]
value_type = "int"
label = "η₁"
name = "Unique operators"
description = "Distinct operators (η₁) — a Halstead base count feeding `vocabulary` / `volume`."
direction = "lower_better"
category = "halstead"

[ast.eta2]
value_type = "int"
label = "η₂"
name = "Unique operands"
description = "Distinct operands (η₂) — a Halstead base count feeding `vocabulary` / `volume`."
direction = "lower_better"
category = "halstead"

[ast.n1]
value_type = "int"
label = "N₁"
name = "Total operators"
description = "Total operator occurrences (N₁) — a Halstead base count feeding `length`."
direction = "lower_better"
category = "halstead"

[ast.n2]
value_type = "int"
label = "N₂"
name = "Total operands"
description = "Total operand occurrences (N₂) — a Halstead base count feeding `length`."
direction = "lower_better"
category = "halstead"

# Structural counts — emitted (inputs to cyclomatic / MI).
[ast.spaces]
value_type = "int"
label = "Spaces"
name = "Unit count"
description = "Unit count: the source file (1) plus each function / impl / trait / closure space. Feeds `cyclomatic`."
direction = "lower_better"
category = "complexity"

[ast.branches]
value_type = "int"
label = "Branches"
name = "Decision points"
description = "Decision points: if / for / while / loop / match arm / try / && / ||. Feeds `cyclomatic`."
direction = "lower_better"
category = "complexity"

[ast.span_sloc]
value_type = "int"
label = "Span"
name = "Line span"
description = "Line span of the unit (end_row − start_row) — the size input the Maintainability Index (`mi` / `mi_sei`) is computed from."
direction = "lower_better"
category = "maintainability"

# Structural counts — emitted measured metrics.
[ast.cognitive]
value_type = "int"
label = "Cognitive"
name = "Cognitive complexity"
description = """
How hard the code is for a human to follow — not just how many paths it has.
Like `cyclomatic` it adds +1 for each break in linear flow (`if`, `else`, `match`, loops, `catch`, chained `&&` / `||`), but it also adds an extra +1 for every level of nesting: an `if` inside a loop inside an `if` costs far more than three flat `if`s.
That nesting penalty is the point — deeply indented logic is what actually strains a reader, so a high `cognitive` next to a modest `cyclomatic` flags tangled, hard-to-read code.
Summed across every function in the file."""
direction = "lower_better"
category = "complexity"
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/Cognitive.md"

[ast.exits]
value_type = "int"
label = "Exits"
name = "Exit points"
description = "Number of exit points (return/throw) in the unit."
direction = "lower_better"
category = "complexity"

[ast.args]
value_type = "int"
label = "Args"
name = "Arguments"
description = "Number of function / closure arguments."
direction = "lower_better"
category = "complexity"

[ast.closures]
value_type = "int"
label = "Closures"
description = "Number of closures defined in the unit."
direction = "lower_better"
category = "complexity"

# LOC counts — emitted measured metrics.
[ast.sloc]
value_type = "int"
label = "Source"
name = "Source lines"
short = "SLOC"
description = "Source lines of code — lines with at least one non-whitespace, non-comment character. Blank and comment-only lines are not counted (unlike `loc`, the raw file line count)."
category = "loc"

[ast.lloc]
value_type = "int"
label = "Logical"
name = "Logical lines"
description = "Logical lines — counts statements, not physical lines."
category = "loc"

[ast.cloc]
value_type = "int"
label = "Comments"
name = "Comment lines"
description = "Comment-only lines (inline comments on code lines are not counted)."
category = "loc"

[ast.blank]
value_type = "int"
label = "Blank"
name = "Blank lines"
description = "Empty or whitespace-only lines."
category = "loc"

[ast.tloc]
value_type = "int"
label = "Test"
name = "Test lines"
short = "TLOC"
description = "Test lines of code — the lines inside `#[cfg(test)]` / `#[test]` / `#[bench]` items (Rust), removed before the production metrics are measured. The complement of `sloc`: test code never inflates a file's size, HK, or complexity."
category = "loc"

# ── fields (derived: a `formula_cel` formula over the AST inputs) ─────────────────────
[fields.bugs]
value_type = "float"
label = "Bugs"
name = "Halstead bugs"
short = "H.bugs"
description = "Estimated delivered bugs — a rough predictor of defect density."
formula_cel = "eta2 > 0.0 ? pow(effort, 2.0 / 3.0) / 3000.0 : 0.0"
formula_pretty = "effort^⅔ ÷ 3000"
formula_js = "effort ** (2/3) / 3000"
direction = "lower_better"
category = "halstead"

[fields.cyclomatic]
value_type = "int"
label = "Cyclomatic"
name = "Cyclomatic complexity"
description = """
Number of independent paths through the code — roughly the minimum number of test cases needed to cover every branch.
A function starts at 1 and gains +1 per decision point: each `if` / `else if`, every `match` / `switch` arm, every loop, and each `&&` / `||` in a condition.
Summed across every function in the file, so it grows with both size and branching — the file's total branching burden.
Counts paths only, ignoring how deeply they nest. For a readability-weighted view see `cognitive`."""
formula_cel = "spaces + branches"
formula_pretty = "spaces + branches"
formula_js = "spaces + branches"
direction = "lower_better"
category = "complexity"
omit_at = 1.0
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/Cyclomatic.md"

[fields.effort]
value_type = "float"
label = "Effort"
name = "Halstead effort"
short = "H.effort"
description = "Mental effort to implement the algorithm."
formula_cel = "eta2 > 0.0 ? (eta1 / 2.0) * (n2 / eta2) * volume : 0.0"
formula_pretty = "(eta1 ÷ 2) × (n2 ÷ eta2) × volume"
formula_js = "(eta1 / 2) * (n2 / eta2) * volume"
direction = "lower_better"
category = "halstead"

[fields.length]
value_type = "float"
label = "Length"
name = "Halstead length"
short = "H.len"
description = "Program length — total operator + operand occurrences."
formula_cel = "n1 + n2"
formula_pretty = "n1 + n2"
formula_js = "n1 + n2"
direction = "lower_better"
category = "halstead"

[fields.mi]
value_type = "float"
label = "MI"
name = "Maintainability index"
description = "Maintainability Index (0–100, higher is more maintainable). Derived from Halstead volume, cyclomatic complexity, and SLOC."
formula_cel = "171.0 - 5.2 * ln(volume) - 0.23 * cyclomatic - 16.2 * ln(span_sloc)"
formula_pretty = "171 − 5.2·ln(volume) − 0.23·cyclomatic − 16.2·ln(span_sloc)"
formula_js = "171 - 5.2*Math.log(volume) - 0.23*cyclomatic - 16.2*Math.log(span_sloc)"
direction = "higher_better"
category = "maintainability"

[fields.mi_sei]
value_type = "float"
label = "MI (SEI)"
name = "Maintainability (SEI)"
short = "MI SEI"
description = "SEI variant of the Maintainability Index — adds a bonus for comment density."
formula_cel = "171.0 - 5.2 * log2(volume) - 0.23 * cyclomatic - 16.2 * log2(span_sloc) + 50.0 * sin(sqrt(cloc / span_sloc * 2.4))"
formula_pretty = "171 − 5.2·log₂(volume) − 0.23·cyclomatic − 16.2·log₂(span_sloc) + 50·sin(√(cloc ÷ span_sloc × 2.4))"
formula_js = "171 - 5.2*Math.log2(volume) - 0.23*cyclomatic - 16.2*Math.log2(span_sloc) + 50*Math.sin(Math.sqrt(cloc / span_sloc * 2.4))"
direction = "higher_better"
category = "maintainability"

[fields.time]
value_type = "float"
label = "Time"
name = "Halstead time, s"
short = "H.time(s)"
description = "Estimated implementation time, in seconds."
formula_cel = "effort / 18.0"
formula_pretty = "effort ÷ 18"
formula_js = "effort / 18"
direction = "lower_better"
category = "halstead"

[fields.vocabulary]
value_type = "float"
label = "Vocabulary"
name = "Halstead vocabulary"
short = "H.vocab"
description = "Vocabulary — distinct operators + operands."
formula_cel = "eta1 + eta2"
formula_pretty = "eta1 + eta2"
formula_js = "eta1 + eta2"
direction = "lower_better"
category = "halstead"

[fields.volume]
value_type = "float"
label = "Volume"
name = "Halstead volume"
short = "H.vol"
description = "Algorithm size in bits, from distinct operators and operands."
formula_cel = "vocabulary > 0.0 ? length * log2(vocabulary) : 0.0"
formula_pretty = "length × log₂(vocabulary)"
formula_js = "length * Math.log2(vocabulary)"
direction = "lower_better"
category = "halstead"

# Henry–Kafura: a graph-derived field — its `formula_cel` reads the coupling
# counts (`fan_in`/`fan_out`) the graph pass writes, so it is evaluated AFTER that
# pass (see `builtin::write_derived`), not in the per-file tier-2 step. A file with
# no `sloc` (no analysed source lines) gets no `hk`. (`fan_in`/`fan_out`/`sloc` are
# integers, so `pow(·, 2.0)` is exact and the result is rounded to 3 significant
# figures like every metric.)
[fields.hk]
value_type = "float"
label = "HK"
name = "Henry–Kafura"
short = "HK"
description = "Henry-Kafura information-flow complexity: a module that is both a busy crossroads (high fan-in × fan-out) and large — the most expensive place in the codebase to change."
formula_cel = "sloc * pow(fan_in * fan_out, 2.0)"
formula_pretty = "sloc × (fan_in × fan_out)²"
formula_js = "sloc * (fan_in * fan_out) ** 2"
direction = "lower_better"
category = "coupling"
abbreviate = true
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/HK.md"

# ── coupling (computed post-walk by annotate_coupling / annotate_cycles) ───────
# Spec-only entries: the VALUES are derived by the graph crate's coupling/cycle
# pass over the flow edges, so these carry NO `formula_cel`. `fan_in` / `fan_out`
# are neutral (a high count is dual — broad reuse vs. bottleneck). `description` is
# the `why` and `remediation` the `fix` shown by `check` (data, not Rust). The
# size-folding `hk` is a graph-derived `[fields.hk]` above (it has a `formula_cel`).
[coupling.fan_in]
value_type = "int"
label = "Fan-in"
description = "Many other units depend on this one, making it risky to change and a single point of failure — though some hubs (shared types) carry high fan-in legitimately."
category = "coupling"
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/Fan-in.md"

[coupling.fan_out]
value_type = "int"
label = "Fan-out"
description = "This unit depends on many others, so it breaks when any of them change and is hard to test in isolation."
category = "coupling"
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/Fan-out.md"

[coupling.fan_out_external]
value_type = "int"
label = "Fan-out (external)"
description = "Number of distinct external libraries this node depends on."
category = "coupling"

[coupling.cycle]
value_type = "str"
label = "Cycle"
short = "Cycle"
description = "Cycle kind this node participates in."

# ── cycle kinds (computed by annotate_cycles) ─────────────────────────────────
# Diagnostic vocab for dependency-cycle kinds — `description` is the `why` and
# `remediation` the `fix` shown by `check`; the orchestrator overlays these onto
# each level's `cycle_kinds`. Language-agnostic, like coupling.
[cycles.mutual]
label = "Mutual"
description = "Two units import each other (A ↔ B), so neither can be built, tested, or understood in isolation — the tightest possible coupling."
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/ADP.md"

[cycles.chain]
label = "Chain"
description = "Three or more units form a strongly-connected component (A → B → C → A); the whole component must be loaded and changed together, defeating modular boundaries."
remediation = "Download and follow the instructions on https://github.com/ffedoroff/code-ranker/blob/main/languages/base/ADP.md"

# ── prompt scaffolding ────────────────────────────────────────────────────────
# The Prompt-Generator framing prose moved OUT of this file into `metrics/prompt.md`
# (authored as Markdown `## <field>` sections, parsed by `builtin.rs::prompt_template`).
# Edit it there.

# ── report views ──────────────────────────────────────────────────────────────
# ONE section, with the SAME key names a project's `[report]` override uses and
# the same names that reach the JSON `ui` block and the viewer — so the vocabulary
# matches end to end (catalog → ReportOverride → LevelUi → ui → JS). Each list is
# pruned by the orchestrator to the keys actually present on an internal node.
#   columns       node-table column order, left to right (one flat list; may
#                 include non-field tokens `kind`/`cycle` and coupling keys).
#   default_sort  initial sort, most-significant first; leading `-` = descending.
#                 Every column stays sortable in the UI — this only sets the open order.
#   card          ordered metrics shown as the big numbers on a node's card.
#   size          attribute keys the SVG map offers as circle-size modes.
#   filter        attribute keys the SVG map offers as on/off node filters (keep
#                 only nodes where the metric has signal).
# A language/project extends any list via its own `[report]` (e.g.
# `size = { add = [...] }`).
[report]
columns = [
    "kind", "cycle", "sloc", "hk", "fan_in", "fan_out", "volume", "bugs",
    "effort", "time", "length", "vocabulary", "cyclomatic", "cognitive", "mi",
    "mi_sei", "lloc", "cloc", "blank", "tloc",
]
default_sort = ["-cycle", "-hk", "-sloc"]
card = ["hk", "sloc"]
size = ["sloc", "hk"]
filter = ["cycle"]

# ── aggregates (the `stats` block of the JSON report) ─────────────────────────
# Each key here becomes one entry in the report's per-graph aggregate map; its
# value is a graph-scope CEL formula, evaluated once over the whole node set.
# These can be any aggregate, not just means. The `agg(metric, reducer,
# population)` host function reduces a metric's value population to a scalar:
#   - reducer:    avg / sum / min / max / count / median / p<q>  (e.g. p50, p90, p99)
#   - population: `not_empty` — only nodes whose value carries signal (≠ the
#                 metric's omit floor); `all` — every internal node, with missing
#                 values counted at the floor.
# The output KEY is free-form; the convention is `<metric>[_<population>]_<reducer>`
# (population left out ⇒ not_empty), but only the formula decides what is computed.
#
# The first block reproduces today's aggregates: the per-file mean of each tracked
# metric with zero/floor values excluded (= avg over `not_empty`).
[report.stats]
blank = "agg('blank', 'avg', 'not_empty')"
bugs = "agg('bugs', 'avg', 'not_empty')"
cloc = "agg('cloc', 'avg', 'not_empty')"
cognitive = "agg('cognitive', 'avg', 'not_empty')"
cyclomatic = "agg('cyclomatic', 'avg', 'not_empty')"
effort = "agg('effort', 'avg', 'not_empty')"
fan_in = "agg('fan_in', 'avg', 'not_empty')"
fan_out = "agg('fan_out', 'avg', 'not_empty')"
hk = "agg('hk', 'avg', 'not_empty')"
length = "agg('length', 'avg', 'not_empty')"
mi = "agg('mi', 'avg', 'not_empty')"
mi_sei = "agg('mi_sei', 'avg', 'not_empty')"
sloc = "agg('sloc', 'avg', 'not_empty')"
time = "agg('time', 'avg', 'not_empty')"
tloc = "agg('tloc', 'avg', 'not_empty')"
vocabulary = "agg('vocabulary', 'avg', 'not_empty')"
volume = "agg('volume', 'avg', 'not_empty')"

# Examples of the richer aggregates the same mechanism allows — each adds a NEW
# key to the aggregate block. With `all` the empty/floor nodes are counted in:
fan_in_all_p50 = "agg('fan_in', 'p50', 'all')"
sloc_all_avg = "agg('sloc', 'avg', 'all')"
# …and a couple over `not_empty` (the population left out of the key name):
sloc_max = "agg('sloc', 'max', 'not_empty')"
hk_p99 = "agg('hk', 'p99', 'not_empty')"