Skip to main content

code_ranker_graph/registry/
eval.rs

1//! Pure numeric / formula evaluation helpers for the metric [`super::Engine`].
2//!
3//! Behavior-preserving extraction of the registry's stateless building blocks:
4//! formula compilation ordering ([`topo_order`] + [`references`]), program
5//! execution coercion ([`exec_f64`]), the host-function registrars
6//! ([`register_math`] / [`register_agg`]), and the population reducers
7//! ([`reduce`] / [`percentile`]). Kept in a sibling module so the parent file
8//! stays focused on the [`super::Engine`] / [`super::Populations`] data types.
9
10use super::model::{MetricDef, RegistryError};
11use cel::{Context, Program, Value};
12use std::collections::BTreeMap;
13
14/// Register `agg(key, reducer, population)` bound to this graph's populations.
15/// `reducer` is `sum`/`avg`/`mean`/`min`/`max`/`count`/`median`/`p<q>`;
16/// `population` is `all`/`not_empty`. An empty population or unknown reducer
17/// yields `NaN` (→ the metric is omitted), never a panic.
18pub(crate) fn register_agg(ctx: &mut Context, pops: std::sync::Arc<Populations>) {
19    use std::sync::Arc;
20    ctx.add_function(
21        "agg",
22        move |key: Arc<String>, reducer: Arc<String>, population: Arc<String>| -> f64 {
23            pops.reduce_for(&key, &reducer, &population)
24        },
25    );
26}
27
28/// Reduce a value population to a scalar. Empty population or unknown reducer →
29/// `None` (→ omit). Percentiles use the numpy-default linear interpolation (R-7).
30pub(crate) fn reduce(vals: &[f64], reducer: &str) -> Option<f64> {
31    if vals.is_empty() {
32        return None;
33    }
34    match reducer {
35        "sum" => Some(vals.iter().sum()),
36        "avg" | "mean" => Some(vals.iter().sum::<f64>() / vals.len() as f64),
37        "min" => Some(vals.iter().copied().fold(f64::INFINITY, f64::min)),
38        "max" => Some(vals.iter().copied().fold(f64::NEG_INFINITY, f64::max)),
39        "count" => Some(vals.len() as f64),
40        "median" => percentile(vals, 50.0),
41        // `top<N>` / `top<N>_<reducer>`: keep the N largest values, then apply the
42        // base reducer (default `avg`). E.g. `top10_avg`, `top5_sum`, `top10_max`.
43        _ if reducer
44            .strip_prefix("top")
45            .and_then(|rest| rest.split('_').next())
46            .is_some_and(|n| !n.is_empty() && n.bytes().all(|b| b.is_ascii_digit())) =>
47        {
48            let rest = reducer.strip_prefix("top").unwrap();
49            let (num, base) = match rest.split_once('_') {
50                Some((n, b)) => (n, b),
51                None => (rest, "avg"),
52            };
53            let n: usize = num.parse().ok()?;
54            let mut s = vals.to_vec();
55            s.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)); // desc
56            s.truncate(n);
57            reduce(&s, base)
58        }
59        r if r.starts_with('p') => r[1..].parse::<f64>().ok().and_then(|q| percentile(vals, q)),
60        _ => None,
61    }
62}
63
64/// The `q`-th percentile (0–100) by linear interpolation between closest ranks —
65/// `numpy.percentile`'s default `method="linear"` (Hyndman–Fan type 7).
66pub(crate) fn percentile(vals: &[f64], q: f64) -> Option<f64> {
67    if vals.is_empty() {
68        return None;
69    }
70    let mut s = vals.to_vec();
71    s.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
72    let n = s.len();
73    if n == 1 {
74        return Some(s[0]);
75    }
76    let h = (n as f64 - 1.0) * (q / 100.0);
77    let lo = h.floor();
78    let lo_i = lo as usize;
79    let frac = h - lo;
80    let v = if lo_i + 1 < n {
81        s[lo_i] + frac * (s[lo_i + 1] - s[lo_i])
82    } else {
83        s[lo_i]
84    };
85    Some(v)
86}
87
88/// Execute a program against a context and coerce a finite numeric result to
89/// `f64`. Any error / non-numeric / non-finite result → `None` (→ omit).
90pub(crate) fn exec_f64(program: &Program, ctx: &Context) -> Option<f64> {
91    match program.execute(ctx) {
92        Ok(Value::Float(v)) if v.is_finite() => Some(v),
93        Ok(Value::Int(v)) => Some(v as f64),
94        Ok(Value::UInt(v)) => Some(v as f64),
95        // Non-finite floats and non-numeric results carry no signal.
96        _ => None,
97    }
98}
99
100/// Register the math host functions CEL itself lacks (each the exact `f64` op the
101/// Rust engines use, so a transcribed formula is bit-identical). Shared by the
102/// metric engine and the `[rules.checks]` predicate context (see
103/// [`crate::checks`]) so both speak the same arithmetic.
104pub(crate) fn register_math(ctx: &mut Context) {
105    ctx.add_function("log2", |x: f64| x.log2());
106    ctx.add_function("ln", |x: f64| x.ln());
107    ctx.add_function("log10", |x: f64| x.log10());
108    ctx.add_function("pow", |x: f64, y: f64| x.powf(y));
109    ctx.add_function("sqrt", |x: f64| x.sqrt());
110    ctx.add_function("sin", |x: f64| x.sin());
111    ctx.add_function("cos", |x: f64| x.cos());
112    ctx.add_function("abs", |x: f64| x.abs());
113    ctx.add_function("min2", |x: f64, y: f64| x.min(y));
114    ctx.add_function("max2", |x: f64, y: f64| x.max(y));
115}
116
117/// Whole-word membership: does `formula` reference identifier `key`? Mirrors the
118/// viewer's `\bkey\b` scan — snake_case keys are safe (`mi` won't hit `mi_sei`).
119pub(crate) fn references(formula: &str, key: &str) -> bool {
120    let bytes = formula.as_bytes();
121    let kb = key.as_bytes();
122    let is_word = |c: u8| c.is_ascii_alphanumeric() || c == b'_';
123    let mut i = 0;
124    while let Some(pos) = formula[i..].find(key) {
125        let start = i + pos;
126        let end = start + kb.len();
127        let before_ok = start == 0 || !is_word(bytes[start - 1]);
128        let after_ok = end == bytes.len() || !is_word(bytes[end]);
129        if before_ok && after_ok {
130            return true;
131        }
132        i = start + 1;
133    }
134    false
135}
136
137/// Kahn topological sort over the metric dependency graph (edge `dep → key` when
138/// `key`'s formula references `dep`). Returns evaluation order or a cycle error.
139pub(crate) fn topo_order(
140    defs: &[(&String, &MetricDef)],
141    keys: &[String],
142) -> Result<Vec<String>, RegistryError> {
143    let keyset: std::collections::BTreeSet<&str> = keys.iter().map(|s| s.as_str()).collect();
144    // deps[key] = set of other registry keys it references.
145    let mut deps: BTreeMap<String, std::collections::BTreeSet<String>> = BTreeMap::new();
146    let mut indeg: BTreeMap<String, usize> = BTreeMap::new();
147    for k in keys {
148        deps.entry(k.clone()).or_default();
149        indeg.entry(k.clone()).or_insert(0);
150    }
151    for (key, def) in defs {
152        for cand in &keyset {
153            if *cand != key.as_str()
154                && references(&def.formula_cel, cand)
155                && deps.get_mut(*key).unwrap().insert((*cand).to_string())
156            {
157                *indeg.get_mut(*key).unwrap() += 1;
158            }
159        }
160    }
161    // Kahn, draining zero-indegree keys (BTree iteration = stable order).
162    let mut order = Vec::with_capacity(keys.len());
163    loop {
164        let ready: Vec<String> = indeg
165            .iter()
166            .filter(|&(_, &d)| d == 0)
167            .map(|(k, _)| k.clone())
168            .collect();
169        if ready.is_empty() {
170            break;
171        }
172        for k in ready {
173            indeg.remove(&k);
174            for (other, od) in deps.iter() {
175                if od.contains(&k)
176                    && let Some(d) = indeg.get_mut(other)
177                {
178                    *d -= 1;
179                }
180            }
181            order.push(k);
182        }
183    }
184    if order.len() != keys.len() {
185        let mut remaining: Vec<String> = indeg.keys().cloned().collect();
186        remaining.sort();
187        return Err(RegistryError::Cycle { keys: remaining });
188    }
189    Ok(order)
190}
191
192/// The value populations an aggregate reduces over, per metric key. Two flavours
193/// per the metric's no-signal value (`omit_at`):
194/// - `not_empty` — only nodes whose value carries signal (≠ `omit_at`);
195/// - `all` — every applicable (internal) node, missing values counted at the
196///   floor (`omit_at`), so e.g. files with zero coupling honestly weigh in.
197///
198/// Lives beside [`reduce`]/[`register_agg`] (its reduce target) so the trio stays
199/// in one module rather than splitting a mutual dependency across files.
200#[derive(Debug, Clone, Default)]
201pub struct Populations {
202    not_empty: BTreeMap<String, Vec<f64>>,
203    all: BTreeMap<String, Vec<f64>>,
204}
205
206impl Populations {
207    /// Reduce one metric's population to a scalar, as the `agg(key, reducer,
208    /// population)` host function does. Unknown key / empty population / unknown
209    /// reducer → `NaN`. Pure (no interior state), so a caller may memoize the
210    /// result — it is identical for every node in a run.
211    pub(crate) fn reduce_for(&self, key: &str, reducer: &str, population: &str) -> f64 {
212        let table = match population {
213            "all" => &self.all,
214            _ => &self.not_empty,
215        };
216        table
217            .get(key)
218            .and_then(|vals| reduce(vals, reducer))
219            .unwrap_or(f64::NAN)
220    }
221
222    /// Build both populations from each applicable node's present numeric
223    /// attributes (`rows`), the set of metric `keys`, and each key's `omit_at`.
224    /// Aggregation is over true computed values: a node missing a key contributes
225    /// the floor to `all` (and nothing to `not_empty`).
226    pub fn build(
227        rows: &[BTreeMap<String, f64>],
228        keys: &[String],
229        omit_at: &BTreeMap<String, f64>,
230    ) -> Populations {
231        let mut not_empty = BTreeMap::new();
232        let mut all = BTreeMap::new();
233        for key in keys {
234            let omit = omit_at.get(key).copied().unwrap_or(0.0);
235            let present: Vec<f64> = rows.iter().filter_map(|r| r.get(key).copied()).collect();
236            let ne: Vec<f64> = present.iter().copied().filter(|v| *v != omit).collect();
237            let missing = rows.len().saturating_sub(present.len());
238            let mut a = present;
239            a.extend(std::iter::repeat_n(omit, missing));
240            not_empty.insert(key.clone(), ne);
241            all.insert(key.clone(), a);
242        }
243        Populations { not_empty, all }
244    }
245}