Skip to main content

code_ranker_graph/
checks.rs

1//! Custom config-defined checks (`[rules.checks.<id>]`) — a **config-only linter
2//! primitive**.
3//!
4//! A [`CheckDef`] pairs a CEL boolean `when` predicate with a diagnostic
5//! `message`. The predicate is evaluated **per node** over everything the node
6//! carries: its numeric / boolean / string attributes (`tloc`, `unsafe`,
7//! `cyclomatic`, …) **plus** derived path strings (`path`, `name`, `stem`,
8//! `ext`, `dir`). String predicates use CEL's own stdlib (`contains`,
9//! `startsWith`, `endsWith`, `matches` regex, `size`, `double`, …); on top of it
10//! we register only the graph-aware functions (`depends_on` / `depended_on_by` /
11//! `file_exists`). When the predicate is `true`, the check fires and produces a
12//! [`CheckHit`] the CLI turns into a violation.
13//!
14//! This is what lets a project express a custom linter — e.g. "no inline tests
15//! in a production file" (`tloc > 0 && !path.endsWith("_tests.rs")`) — entirely
16//! in `code-ranker.toml`, with no Rust change. It complements
17//! `[rules.thresholds.file]` (which only does `metric > limit`) with an arbitrary
18//! boolean expression and a path-aware, string-aware context.
19
20mod text;
21
22use crate::level_graph::LevelGraph;
23use crate::nodepath::{node_path, split_path};
24use cel::{Context, Program, Value};
25use code_ranker_plugin_api::{attrs::AttrValue, node::EXTERNAL, node::Node};
26use serde::Deserialize;
27use std::collections::{BTreeMap, HashMap, HashSet};
28use std::sync::Arc;
29use text::{references, render_message, replace_word};
30
31/// The default concern-group label for a check that doesn't set one.
32const DEFAULT_GROUP: &str = "LNT";
33
34/// One custom check from `[rules.checks.<id>]`.
35#[derive(Debug, Clone, Deserialize)]
36#[serde(deny_unknown_fields)]
37pub struct CheckDef {
38    /// CEL boolean predicate over the node's values. `true` → a violation.
39    pub when: String,
40    /// Diagnostic message. `{key}` placeholders are filled from the node's
41    /// values at evaluation time (any attribute, or a derived path field
42    /// `path`/`name`/`stem`/`ext`/`dir`). An unknown `{key}` is left verbatim.
43    pub message: String,
44    /// Concern-group label shown / grouped in diagnostics (free-form, e.g.
45    /// `"TST"`). Defaults to [`DEFAULT_GROUP`].
46    #[serde(default)]
47    pub group: Option<String>,
48    /// Optional diagnostic copy — the `why` / `fix` lines in `check` output.
49    #[serde(default)]
50    pub why: Option<String>,
51    #[serde(default)]
52    pub fix: Option<String>,
53    /// Optional title (SARIF `shortDescription`). Defaults to the check id.
54    #[serde(default)]
55    pub title: Option<String>,
56}
57
58/// A compiled check: its id, the parsed predicate program, its definition, and
59/// which graph collections the predicate references (so eval binds only those).
60pub struct CompiledCheck {
61    pub id: String,
62    pub def: CheckDef,
63    program: Program,
64    uses: Uses,
65}
66
67/// Which graph-derived list variables a predicate mentions. Binding the project
68/// file list is O(files), so it is bound only when actually referenced.
69#[derive(Default, Clone, Copy)]
70struct Uses {
71    deps: bool,
72    rdeps: bool,
73    files: bool,
74    siblings: bool,
75}
76
77/// A `when` predicate that failed to compile (reported up-front, so the gate
78/// fails loudly instead of silently skipping a misspelled check).
79#[derive(Debug, Clone)]
80pub struct CheckCompileError {
81    pub id: String,
82    pub message: String,
83}
84
85impl std::fmt::Display for CheckCompileError {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        write!(
88            f,
89            "check `{}`: invalid `when` predicate: {}",
90            self.id, self.message
91        )
92    }
93}
94
95impl std::error::Error for CheckCompileError {}
96
97/// A fired check on a node — the data a violation is built from.
98#[derive(Debug, Clone)]
99pub struct CheckHit {
100    pub id: String,
101    pub message: String,
102    pub group: String,
103    pub why: Option<String>,
104    pub fix: Option<String>,
105    pub title: Option<String>,
106}
107
108/// Compile one check's `when` predicate. Named helpers from `[rules.defs]` are
109/// expanded into the predicate first (see [`expand_defs`]), so a check can reuse
110/// a shared vocabulary (`is_domain`, `is_test_file`, …).
111pub fn compile(
112    id: &str,
113    def: &CheckDef,
114    defs: &BTreeMap<String, String>,
115) -> Result<CompiledCheck, CheckCompileError> {
116    let when = expand_defs(id, &def.when, defs)?;
117    let program = Program::compile(&when).map_err(|e| CheckCompileError {
118        id: id.to_string(),
119        message: e.to_string(),
120    })?;
121    let uses = Uses {
122        deps: references(&when, "deps"),
123        rdeps: references(&when, "rdeps"),
124        files: references(&when, "files"),
125        siblings: references(&when, "siblings"),
126    };
127    Ok(CompiledCheck {
128        id: id.to_string(),
129        def: def.clone(),
130        program,
131        uses,
132    })
133}
134
135impl CompiledCheck {
136    /// Evaluate the predicate over `node`, with `graph` giving access to the
137    /// fully-built level (edges + the file set) for dependency / collection
138    /// predicates. Returns a [`CheckHit`] when it fires (`when` evaluates to
139    /// `true`). A predicate that errors or yields a non-boolean value does
140    /// **not** fire — a check never panics on a node.
141    pub fn eval(&self, node: &Node, graph: &GraphView) -> Option<CheckHit> {
142        // `Context::default()` already provides the CEL string stdlib
143        // (`contains` / `startsWith` / `endsWith` / `matches` regex / `size` /
144        // `double` / …). On top we add the same math host functions the metric
145        // engine uses (`pow` / `log2` / `sqrt` / …) and the graph-aware functions,
146        // so a predicate can do real arithmetic over node values.
147        let mut ctx = Context::default();
148        crate::registry::register_math(&mut ctx);
149        // `agg(metric, reducer, population)` over the whole project, so a predicate
150        // can use a relative threshold (this node vs the project distribution).
151        // Memoized across nodes (see `GraphView::register_agg`).
152        graph.register_agg(&mut ctx);
153        register_graph_fns(&mut ctx, graph, &node.id);
154        bind_node(&mut ctx, node);
155        self.bind_collections(&mut ctx, node, graph);
156        match self.program.execute(&ctx) {
157            Ok(Value::Bool(true)) => Some(CheckHit {
158                id: self.id.clone(),
159                message: render_message(&self.def.message, node),
160                group: self
161                    .def
162                    .group
163                    .clone()
164                    .unwrap_or_else(|| DEFAULT_GROUP.to_string()),
165                // `{key}` placeholders are interpolated in the copy too, so a
166                // per-file fix reads "move into `handler_tests.rs`", not `{stem}`.
167                why: self.def.why.as_deref().map(|s| render_message(s, node)),
168                fix: self.def.fix.as_deref().map(|s| render_message(s, node)),
169                title: self.def.title.clone(),
170            }),
171            _ => None,
172        }
173    }
174
175    /// Bind the graph-derived list variables the predicate actually references:
176    /// `deps` / `rdeps` (out / in dependency neighbours of this node, by label),
177    /// `files` (every project file path), `siblings` (files in the same folder).
178    /// Each is a CEL list, usable with the comprehension macros (`.exists`,
179    /// `.all`, `.filter`, `.size()`).
180    fn bind_collections(&self, ctx: &mut Context, node: &Node, graph: &GraphView) {
181        if self.uses.deps {
182            let _ = ctx.add_variable("deps", graph.deps(&node.id));
183        }
184        if self.uses.rdeps {
185            let _ = ctx.add_variable("rdeps", graph.rdeps(&node.id));
186        }
187        if self.uses.files {
188            let _ = ctx.add_variable("files", graph.files_vec());
189        }
190        if self.uses.siblings {
191            let _ = ctx.add_variable("siblings", graph.siblings(&node_path(node)));
192        }
193    }
194}
195
196/// Bind a node's values into the CEL context: every attribute under its own key
197/// (numeric / boolean / string), plus the derived path fields.
198fn bind_node(ctx: &mut Context, node: &Node) {
199    for (key, value) in node.attrs.iter() {
200        match value {
201            AttrValue::Int(i) => {
202                let _ = ctx.add_variable(key.as_str(), *i);
203            }
204            AttrValue::Float(f) => {
205                let _ = ctx.add_variable(key.as_str(), *f);
206            }
207            AttrValue::Bool(b) => {
208                let _ = ctx.add_variable(key.as_str(), *b);
209            }
210            AttrValue::Str(s) => {
211                let _ = ctx.add_variable(key.as_str(), s.clone());
212            }
213        }
214    }
215    // Derived path fields. `path` may already be bound from the attr loop above;
216    // re-binding it here is harmless (same value) and covers nodes that carry the
217    // path only in their id.
218    let path = node_path(node);
219    let parts = split_path(&path);
220    let _ = ctx.add_variable("path", path);
221    let _ = ctx.add_variable("name", parts.name);
222    let _ = ctx.add_variable("stem", parts.stem);
223    let _ = ctx.add_variable("ext", parts.ext);
224    let _ = ctx.add_variable("dir", parts.dir);
225}
226
227/// Register the graph-aware predicate functions for `node_id`, bound to the
228/// fully-built level: `depends_on(s)` / `depended_on_by(s)` (does this node have
229/// an out- / in-dependency whose label contains `s` — e.g. `"ext:sqlx"` or
230/// `"/infrastructure/"`), and `file_exists(p)` (is `p` a file in the project).
231fn register_graph_fns(ctx: &mut Context, graph: &GraphView, node_id: &str) {
232    let out = graph.deps(node_id);
233    ctx.add_function("depends_on", move |s: Arc<String>| -> bool {
234        out.iter().any(|d| d.contains(s.as_str()))
235    });
236    let inc = graph.rdeps(node_id);
237    ctx.add_function("depended_on_by", move |s: Arc<String>| -> bool {
238        inc.iter().any(|d| d.contains(s.as_str()))
239    });
240    let files = graph.files_set_arc();
241    ctx.add_function("file_exists", move |p: Arc<String>| -> bool {
242        files.contains(p.as_str())
243    });
244}
245
246/// A read-only view of the fully-built level, prepared once per `check` run and
247/// shared across every node's predicate. Holds the dependency adjacency (by
248/// node id → neighbour *labels*) and the project's file set / per-folder index.
249/// A node's **label** is its repo-relative `path` attribute when present, else
250/// its id (so an external crate stays `ext:<name>`).
251#[derive(Default)]
252pub struct GraphView {
253    out: HashMap<String, Vec<String>>,
254    inc: HashMap<String, Vec<String>>,
255    files: Arc<Vec<String>>,
256    files_set: Arc<HashSet<String>>,
257    by_dir: HashMap<String, Vec<String>>,
258    /// Value populations over all internal nodes, so a predicate can compare a
259    /// node against the project distribution via `agg(metric, reducer, pop)` —
260    /// e.g. a relative threshold `cyclomatic.double() > agg('cyclomatic','p90','not_empty')`.
261    pops: Arc<crate::registry::Populations>,
262    /// Memoized `agg(key, reducer, population)` results. The value is identical
263    /// for every node in a run (the population is the whole project), so each
264    /// distinct call is reduced (sorted) once, not once per file.
265    agg_cache: AggCache,
266}
267
268/// Cache keyed by `(metric, reducer, population)` → the reduced scalar.
269type AggCache = Arc<std::sync::Mutex<HashMap<(String, String, String), f64>>>;
270
271impl GraphView {
272    /// Build the view from a fully-enriched level (nodes carry their `path`
273    /// attribute and the edges are final).
274    pub fn build(level: &LevelGraph) -> Self {
275        let mut label: HashMap<String, String> = HashMap::new();
276        let mut files: Vec<String> = Vec::new();
277        let mut by_dir: HashMap<String, Vec<String>> = HashMap::new();
278        let mut rows: Vec<BTreeMap<String, f64>> = Vec::new();
279        let mut metric_keys: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
280        for n in &level.nodes {
281            let l = label_of(n);
282            label.insert(n.id.clone(), l.clone());
283            if n.kind != EXTERNAL {
284                files.push(l.clone());
285                by_dir.entry(split_path(&l).dir).or_default().push(l);
286                let row = numeric_attrs(n);
287                metric_keys.extend(row.keys().cloned());
288                rows.push(row);
289            }
290        }
291        sort_dedup(&mut files);
292        for v in by_dir.values_mut() {
293            sort_dedup(v);
294        }
295        let files_set: HashSet<String> = files.iter().cloned().collect();
296
297        // Aggregate populations over internal nodes, using each metric's declared
298        // `omit_at` floor (from the level specs) so `not_empty` matches the metric
299        // engine's semantics.
300        let keys: Vec<String> = metric_keys.into_iter().collect();
301        let omit_at: BTreeMap<String, f64> = keys
302            .iter()
303            .map(|k| {
304                let floor = level
305                    .node_attributes
306                    .get(k)
307                    .map(|s| s.omit_at)
308                    .unwrap_or(0.0);
309                (k.clone(), floor)
310            })
311            .collect();
312        let pops = crate::registry::Populations::build(&rows, &keys, &omit_at);
313
314        let mut out: HashMap<String, Vec<String>> = HashMap::new();
315        let mut inc: HashMap<String, Vec<String>> = HashMap::new();
316        let resolve = |id: &str| label.get(id).cloned().unwrap_or_else(|| id.to_string());
317        for e in &level.edges {
318            out.entry(e.source.clone())
319                .or_default()
320                .push(resolve(&e.target));
321            inc.entry(e.target.clone())
322                .or_default()
323                .push(resolve(&e.source));
324        }
325        for v in out.values_mut() {
326            sort_dedup(v);
327        }
328        for v in inc.values_mut() {
329            sort_dedup(v);
330        }
331
332        GraphView {
333            out,
334            inc,
335            files: Arc::new(files),
336            files_set: Arc::new(files_set),
337            by_dir,
338            pops: Arc::new(pops),
339            agg_cache: Arc::new(std::sync::Mutex::new(HashMap::new())),
340        }
341    }
342
343    /// Register the memoizing `agg(key, reducer, population)` host function on
344    /// `ctx`, sharing this view's populations + cache.
345    fn register_agg(&self, ctx: &mut Context) {
346        let pops = self.pops.clone();
347        let cache = self.agg_cache.clone();
348        ctx.add_function(
349            "agg",
350            move |key: Arc<String>, reducer: Arc<String>, population: Arc<String>| -> f64 {
351                let k = (
352                    key.as_str().to_string(),
353                    reducer.as_str().to_string(),
354                    population.as_str().to_string(),
355                );
356                if let Some(v) = cache.lock().unwrap().get(&k) {
357                    return *v;
358                }
359                let v = pops.reduce_for(&key, &reducer, &population);
360                cache.lock().unwrap().insert(k, v);
361                v
362            },
363        );
364    }
365
366    fn deps(&self, id: &str) -> Vec<String> {
367        self.out.get(id).cloned().unwrap_or_default()
368    }
369
370    fn rdeps(&self, id: &str) -> Vec<String> {
371        self.inc.get(id).cloned().unwrap_or_default()
372    }
373
374    fn files_vec(&self) -> Vec<String> {
375        (*self.files).clone()
376    }
377
378    fn files_set_arc(&self) -> Arc<HashSet<String>> {
379        self.files_set.clone()
380    }
381
382    /// Files in the same folder as `path`, excluding `path` itself.
383    fn siblings(&self, path: &str) -> Vec<String> {
384        let dir = split_path(path).dir;
385        self.by_dir
386            .get(&dir)
387            .map(|v| v.iter().filter(|f| f.as_str() != path).cloned().collect())
388            .unwrap_or_default()
389    }
390}
391
392/// A node's numeric attributes as a name→f64 map (for the aggregate populations).
393fn numeric_attrs(node: &Node) -> BTreeMap<String, f64> {
394    let mut m = BTreeMap::new();
395    for (k, v) in node.attrs.iter() {
396        match v {
397            AttrValue::Int(i) => {
398                m.insert(k.clone(), *i as f64);
399            }
400            AttrValue::Float(f) => {
401                m.insert(k.clone(), *f);
402            }
403            _ => {}
404        }
405    }
406    m
407}
408
409fn sort_dedup(v: &mut Vec<String>) {
410    v.sort();
411    v.dedup();
412}
413
414/// A node's label for dependency matching. An **external** crate keeps its
415/// `ext:<name>` id (its `path` attribute is the crate's cargo-registry location,
416/// useless for a `depends_on("ext:sqlx")` predicate). An internal file uses the
417/// same repo-relative string [`node_path`] resolves, so the adjacency / folder
418/// index and a node's own `path`/`dir` always agree.
419fn label_of(node: &Node) -> String {
420    if node.kind == EXTERNAL || node.id.starts_with("ext:") {
421        return node.id.clone();
422    }
423    node_path(node)
424}
425
426/// Expand `[rules.defs]` named helpers into `expr` by whole-word substitution,
427/// to a fixpoint (a helper may reference earlier helpers). Each helper body is
428/// wrapped in parentheses so it composes with surrounding operators. A helper
429/// set that never settles (a reference cycle) is a compile error rather than an
430/// infinite loop.
431fn expand_defs(
432    id: &str,
433    expr: &str,
434    defs: &BTreeMap<String, String>,
435) -> Result<String, CheckCompileError> {
436    let mut out = expr.to_string();
437    // A non-cyclic set settles within `defs.len()` passes; one extra pass detects
438    // a cycle (the (defs.len()+1)-th pass would still be changing the string).
439    for _ in 0..=defs.len() {
440        let mut changed = false;
441        for (name, body) in defs {
442            if references(&out, name) {
443                out = replace_word(&out, name, &format!("({body})"));
444                changed = true;
445            }
446        }
447        if !changed {
448            return Ok(out);
449        }
450    }
451    Err(CheckCompileError {
452        id: id.to_string(),
453        message: "`[rules.defs]` helpers reference each other in a cycle".to_string(),
454    })
455}
456
457#[cfg(test)]
458#[path = "checks_test.rs"]
459mod tests;