code_ranker_graph/builtin.rs
1//! The metric catalog, read from `metrics/builtin.toml`: `[categories.*]`,
2//! `[ast.*]` (tier-1 measured), `[fields.*]` (derived, each a `formula_cel`), and
3//! the `[report]` view section (+ `[report.stats]` aggregate formulas).
4//! The crate root re-exports the accessors below; the tier-1 input types
5//! (`MetricInputs` / `FunctionUnit`) come from `code-ranker-plugin-api`.
6//!
7//! Wire encoding:
8//! - the executable `formula_cel` is internal; the emitted [`AttributeSpec`]
9//! carries `formula` (from `formula_pretty`) and `calc` (from `formula_js`);
10//! - `name` / `short` fall back to `label` (a field only spells out what differs);
11//! - `\n` in a description (TOML multiline) is encoded as `<br>` on the wire;
12//! - the `stats` block is produced by [`crate::stats::compute_stats`] over the
13//! keys whose `[report.stats]` entry is a plain mean
14//! (`agg('<k>','avg','not_empty')`); the richer aggregate formulas are parsed
15//! and available but not yet wired into the built-in stats.
16
17use code_ranker_plugin_api::{
18 PromptTemplate,
19 attrs::ValueType,
20 level::{AttributeGroup, AttributeSpec, CycleKindSpec, Direction},
21};
22use serde::Deserialize;
23use std::collections::BTreeMap;
24use std::sync::LazyLock;
25
26static BUILTIN_TOML: &str = include_str!("../metrics/builtin.toml");
27
28/// The Prompt-Generator scaffolding prose, authored as Markdown (`## <field>`
29/// sections) rather than TOML so it reads naturally and edits like the rest of the
30/// corpus. Parsed by [`prompt_template`].
31static PROMPT_MD: &str = include_str!("../metrics/prompt.md");
32
33/// One metric entry in `[ast.*]` (measured) or `[fields.*]` (derived). All spec
34/// fields are optional; a pure AST input carries only a `description`.
35#[derive(Debug, Clone, Deserialize)]
36#[serde(deny_unknown_fields)]
37struct FieldDef {
38 #[serde(default = "crate::registry::default_value_type")]
39 value_type: String,
40 label: Option<String>,
41 name: Option<String>,
42 short: Option<String>,
43 description: Option<String>,
44 /// How to fix a breach — the `fix` line in `check` diagnostics.
45 remediation: Option<String>,
46 /// Executable CEL formula (derived `[fields.*]` only).
47 formula_cel: Option<String>,
48 /// Pretty display formula (NOT CEL) — emitted as `AttributeSpec.formula`.
49 formula_pretty: Option<String>,
50 /// JS the viewer can re-run — emitted as `AttributeSpec.calc`.
51 formula_js: Option<String>,
52 direction: Option<String>,
53 category: Option<String>,
54 /// Format large values with K/M suffixes (e.g. `hk`).
55 abbreviate: Option<bool>,
56 #[serde(default = "crate::registry::default_omit_at")]
57 omit_at: f64,
58}
59
60/// The `[report]` view section of `builtin.toml` — the SAME shape (and key names)
61/// the project-side `[report]` override uses, so the vocabulary matches end to
62/// end (catalog → `ReportOverride` → `LevelUi` → JSON `ui` → viewer): `columns`,
63/// `card`, `size`, `filter` (+ the `default_sort` signed-rank list), and the
64/// `[report.stats]` aggregate formulas.
65#[derive(Debug, Clone, Default, Deserialize)]
66#[serde(deny_unknown_fields)]
67struct ReportView {
68 /// Node-table column order (may include non-field tokens like `kind`/`cycle`).
69 #[serde(default)]
70 columns: Vec<String>,
71 /// Signed-rank default sort: order = priority, leading `-` = descending.
72 #[serde(default)]
73 default_sort: Vec<String>,
74 /// Card-featured metrics (the big numbers on a node's card).
75 #[serde(default)]
76 card: Vec<String>,
77 /// Attribute keys the SVG map offers as circle-size modes (default `sloc`/`hk`).
78 #[serde(default)]
79 size: Vec<String>,
80 /// Attribute keys the SVG map offers as on/off node filters (default `cycle`).
81 #[serde(default)]
82 filter: Vec<String>,
83 /// `output key → graph-scope CEL formula` for the report's `stats` block.
84 #[serde(default)]
85 stats: BTreeMap<String, String>,
86}
87
88#[derive(Debug, Deserialize)]
89struct Builtin {
90 #[serde(default)]
91 categories: BTreeMap<String, AttributeGroup>,
92 #[serde(default)]
93 ast: BTreeMap<String, FieldDef>,
94 #[serde(default)]
95 fields: BTreeMap<String, FieldDef>,
96 /// Coupling/cycle specs (`fan_in` / `fan_out` / `fan_out_external` / `cycle`):
97 /// display specs only — their values are computed post-walk, not by the CEL
98 /// engine. (`hk` folds these into a graph-derived `[fields.hk]` formula.)
99 #[serde(default)]
100 coupling: BTreeMap<String, FieldDef>,
101 /// Cycle-kind diagnostic vocab (`mutual` / `chain`): label + why + fix,
102 /// overlaid onto each level's cycle_kinds by the orchestrator.
103 #[serde(default)]
104 cycles: BTreeMap<String, CycleKindSpec>,
105 #[serde(default)]
106 report: ReportView,
107}
108
109static BUILTIN: LazyLock<Builtin> =
110 LazyLock::new(|| toml::from_str(BUILTIN_TOML).expect("metrics/builtin.toml parses"));
111
112/// Computing metric values onto a node — the per-unit derivation engines and the
113/// `write_metrics` / `write_derived` entry points, kept in their own file so this
114/// module stays the catalog/spec concern. The `mod`/`super` edges are non-flow.
115mod write;
116pub use write::{write_derived, write_metrics};
117
118/// The canonical view orders read from `builtin.toml`. `columns` and `featured`
119/// are flat ordered lists (they may include non-field tokens like `kind`,
120/// `cycle` and coupling keys); `default_sort` is the signed-rank list.
121#[derive(Debug, Clone, Default)]
122pub struct Views {
123 pub columns: Vec<String>,
124 pub default_sort: Vec<String>,
125 pub card: Vec<String>,
126 /// Map circle-size modes (attribute keys); built-in default `sloc`/`hk`.
127 pub size: Vec<String>,
128 /// Map node-filter keys; built-in default `cycle`.
129 pub filter: Vec<String>,
130}
131
132/// The canonical view orders (table columns + default sort, card metrics, map
133/// size/filter), all from the single `[report]` section of `builtin.toml`.
134pub fn views() -> Views {
135 Views {
136 columns: BUILTIN.report.columns.clone(),
137 default_sort: BUILTIN.report.default_sort.clone(),
138 card: BUILTIN.report.card.clone(),
139 size: BUILTIN.report.size.clone(),
140 filter: BUILTIN.report.filter.clone(),
141 }
142}
143
144/// Re-encode a TOML multiline description (`\n` paragraph breaks) as the `<br>`
145/// the wire/viewer expects.
146fn br(s: &str) -> String {
147 s.replace('\n', "<br>")
148}
149
150fn value_type(s: &str) -> ValueType {
151 match s {
152 "int" => ValueType::Int,
153 "bool" => ValueType::Bool,
154 "str" | "string" => ValueType::Str,
155 _ => ValueType::Float,
156 }
157}
158
159fn direction(s: Option<&str>) -> Direction {
160 match s {
161 Some("lower_better") => Direction::LowerBetter,
162 Some("higher_better") => Direction::HigherBetter,
163 _ => Direction::Neutral,
164 }
165}
166
167/// Build the emitted [`AttributeSpec`] from a metric entry, applying the
168/// `name`/`short` ← `label` fallback, the `formula_pretty`→`formula` /
169/// `formula_js`→`calc` mapping, and the `\n`→`<br>` description re-encoding.
170fn to_spec(d: &FieldDef) -> AttributeSpec {
171 AttributeSpec {
172 value_type: value_type(&d.value_type),
173 label: d.label.clone(),
174 name: d.name.clone().or_else(|| d.label.clone()),
175 short: d.short.clone().or_else(|| d.label.clone()),
176 description: d.description.as_deref().map(br),
177 remediation: d.remediation.as_deref().map(br),
178 formula: d.formula_pretty.clone(),
179 calc: d.formula_js.clone(),
180 direction: direction(d.direction.as_deref()),
181 abbreviate: d.abbreviate,
182 group: d.category.clone(),
183 thresholds: None,
184 omit_at: d.omit_at,
185 }
186}
187
188/// The metric attribute dictionary + category groups, read from `builtin.toml`.
189/// Includes the emitted measured metrics (`[ast.*]` entries that carry a display
190/// spec, i.e. have a `label`) and every derived `[fields.*]` metric. Pure AST
191/// inputs (no `label`) are excluded — they are formula inputs, not emitted.
192pub fn metric_specs() -> (
193 BTreeMap<String, AttributeSpec>,
194 BTreeMap<String, AttributeGroup>,
195) {
196 let mut specs = BTreeMap::new();
197 for (k, d) in &BUILTIN.ast {
198 if d.label.is_some() {
199 specs.insert(k.clone(), to_spec(d));
200 }
201 }
202 for (k, d) in &BUILTIN.fields {
203 specs.insert(k.clone(), to_spec(d));
204 }
205 (specs, BUILTIN.categories.clone())
206}
207
208/// The coupling/cycle attribute dictionary (`fan_in` / `fan_out` /
209/// `fan_out_external` / `cycle`) + the `coupling` group, read from
210/// `builtin.toml` `[coupling.*]`. The VALUES are computed post-walk by
211/// `annotate_coupling` / `annotate_cycles`; these are the display specs only (incl.
212/// the `description` = `why` and `remediation` = `fix` shown by `check`). The
213/// orchestrator merges them into each level's `node_attributes` / groups. (`hk`'s
214/// spec ships with the derived `[fields.*]` via [`metric_specs`].)
215pub fn coupling_specs() -> (
216 BTreeMap<String, AttributeSpec>,
217 BTreeMap<String, AttributeGroup>,
218) {
219 let specs = BUILTIN
220 .coupling
221 .iter()
222 .map(|(k, d)| (k.clone(), to_spec(d)))
223 .collect();
224 let mut groups = BTreeMap::new();
225 if let Some(g) = BUILTIN.categories.get("coupling") {
226 groups.insert("coupling".to_string(), g.clone());
227 }
228 (specs, groups)
229}
230
231/// The cycle-kind diagnostic vocabulary (`mutual` / `chain`) from `builtin.toml`
232/// `[cycles.*]` — label + `description` (why) + `remediation` (fix). The
233/// orchestrator overlays these onto each level's `cycle_kinds`.
234pub fn cycle_specs() -> BTreeMap<String, CycleKindSpec> {
235 BUILTIN.cycles.clone()
236}
237
238/// The Prompt-Generator scaffolding prose, parsed from `metrics/prompt.md` — the
239/// language-neutral framing carried in the snapshot so the CLI `prompt` format and
240/// the HTML viewer render the same text. Each `## <field>` section maps to a
241/// [`PromptTemplate`] field; `## task` keeps one entry per non-blank line
242/// (verbatim, including the leading `- `), the rest join their body into one line.
243pub fn prompt_template() -> PromptTemplate {
244 parse_prompt(PROMPT_MD)
245}
246
247/// Parse caller-supplied prompt-scaffolding Markdown (same `## <field>` shape as
248/// the built-in `metrics/prompt.md`) into a [`PromptTemplate`] — the hook a
249/// `[templates] prompt = "…"` config override flows through.
250pub fn prompt_template_from(md: &str) -> PromptTemplate {
251 parse_prompt(md)
252}
253
254/// Parse the `## <field>` sections of `metrics/prompt.md` into a [`PromptTemplate`].
255fn parse_prompt(md: &str) -> PromptTemplate {
256 let mut t = PromptTemplate::default();
257 let mut field = String::new();
258 let mut body: Vec<&str> = Vec::new();
259 let flush = |field: &str, body: &[&str], t: &mut PromptTemplate| {
260 let nonblank = || body.iter().filter(|l| !l.trim().is_empty());
261 match field {
262 "intro" => t.intro = nonblank().cloned().collect::<Vec<_>>().join(" "),
263 "doc_note" => t.doc_note = nonblank().cloned().collect::<Vec<_>>().join(" "),
264 "focus" => t.focus = nonblank().cloned().collect::<Vec<_>>().join(" "),
265 "cycle_note" => t.cycle_note = nonblank().cloned().collect::<Vec<_>>().join(" "),
266 "task" => t.task = nonblank().map(|l| l.trim_end().to_string()).collect(),
267 _ => {}
268 }
269 };
270 for line in md.lines() {
271 if let Some(h) = line.strip_prefix("## ") {
272 flush(&field, &body, &mut t);
273 field = h.trim().to_string();
274 body.clear();
275 } else if !field.is_empty() {
276 body.push(line);
277 }
278 }
279 flush(&field, &body, &mut t);
280 t
281}
282
283/// The metric keys aggregated into the per-graph `stats` block via the mean
284/// (`compute_stats`). Derived from `[report.stats]`: the keys whose
285/// formula is a plain mean of their own metric over `not_empty`
286/// (`agg('<k>', 'avg', 'not_empty')`). The richer aggregate formulas (percentiles,
287/// `all` population, …) are parsed but not yet wired into the built-in stats.
288pub fn stat_keys() -> Vec<String> {
289 BUILTIN
290 .report
291 .stats
292 .iter()
293 .filter(|(k, formula)| **formula == format!("agg('{k}', 'avg', 'not_empty')"))
294 .map(|(k, _)| k.clone())
295 .collect()
296}
297
298/// All `[report.stats]` formulas (`output key → graph-scope CEL`). Parsed and
299/// available for the future graph-scope aggregate engine; not yet driving the
300/// built-in `stats` block (see [`stat_keys`]).
301pub fn aggregate_formulas() -> BTreeMap<String, String> {
302 BUILTIN.report.stats.clone()
303}
304
305#[cfg(test)]
306#[path = "builtin_test.rs"]
307mod tests;