Skip to main content

kube_cel/validation/
analysis.rs

1//! Static analysis for CEL validation rules.
2//!
3//! Provides compile-time checks beyond syntax validation:
4//! variable scope validation and cost estimation.
5
6use cel::{Program, common::ast::Expr};
7
8use crate::validation::compilation::CompiledSchema;
9
10/// The context in which a CEL rule is evaluated.
11#[derive(Clone, Copy, Debug, PartialEq, Eq)]
12#[non_exhaustive]
13pub enum ScopeContext {
14    /// CRD `x-kubernetes-validations` — only `self`, `oldSelf`, and root vars.
15    CrdValidation,
16    /// ValidatingAdmissionPolicy — `object`, `oldObject`, `request`, `params`, etc.
17    AdmissionPolicy,
18}
19
20/// A warning produced by static analysis.
21///
22/// `#[non_exhaustive]`: an output type the crate constructs; new fields may be
23/// added without a breaking change.
24#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
25#[non_exhaustive]
26pub struct AnalysisWarning {
27    /// The CEL rule the warning applies to.
28    pub rule: String,
29    /// Human-readable description of the warning.
30    pub message: String,
31    /// Classification of the warning.
32    pub kind: WarningKind,
33}
34
35/// The kind of warning produced by static analysis.
36#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
37#[non_exhaustive]
38pub enum WarningKind {
39    /// Variable not available in the given scope.
40    WrongScope,
41    /// Estimated cost may exceed K8s budget.
42    CostExceeded,
43    /// Schema bounds missing (inflates cost estimate).
44    MissingBounds,
45}
46
47fn valid_variables(scope: ScopeContext) -> &'static [&'static str] {
48    match scope {
49        ScopeContext::CrdValidation => &["self", "oldSelf", "apiVersion", "apiGroup", "kind"],
50        ScopeContext::AdmissionPolicy => &[
51            "self",
52            "oldSelf",
53            "object",
54            "oldObject",
55            "request",
56            "params",
57            "namespaceObject",
58            "authorizer",
59            "variables",
60        ],
61    }
62}
63
64/// Check a CEL expression for variable scope violations.
65#[must_use]
66pub fn check_rule_scope(rule: &str, scope: ScopeContext) -> Vec<AnalysisWarning> {
67    let program = match Program::compile(rule) {
68        Ok(p) => p,
69        Err(_) => return vec![],
70    };
71
72    let valid = valid_variables(scope);
73    let mut warnings = Vec::new();
74
75    for var in program.references().variables() {
76        if !valid.contains(&var) {
77            warnings.push(AnalysisWarning {
78                rule: rule.to_string(),
79                message: format!(
80                    "variable '{}' is not available in {:?} context; valid variables: {:?}",
81                    var, scope, valid
82                ),
83                kind: WarningKind::WrongScope,
84            });
85        }
86    }
87
88    warnings
89}
90
91const DEFAULT_MAX_ITEMS: u64 = 1000;
92const DEFAULT_MAX_LENGTH: u64 = 1000;
93const K8S_COST_BUDGET: u64 = 1_000_000;
94const STRING_TRAVERSAL_FACTOR: f64 = 0.1;
95
96/// Estimate cost of a CEL rule and warn if it may exceed K8s budget.
97///
98/// This is a coarse heuristic, not an accurate cost model. It catches the most
99/// common issue: unbounded list comprehensions without maxItems.
100#[must_use]
101pub fn estimate_rule_cost(rule: &str, schema: &CompiledSchema) -> Vec<AnalysisWarning> {
102    let program = match Program::compile(rule) {
103        Ok(p) => p,
104        Err(_) => return vec![],
105    };
106
107    let expr = program.expression();
108    let mut warnings = Vec::new();
109    let cost = estimate_expr_cost(&expr.expr, schema);
110
111    if cost > K8S_COST_BUDGET {
112        warnings.push(AnalysisWarning {
113            rule: rule.to_string(),
114            message: format!(
115                "estimated cost {} exceeds K8s budget {}; consider adding maxItems/maxLength to schema bounds",
116                cost, K8S_COST_BUDGET
117            ),
118            kind: WarningKind::CostExceeded,
119        });
120    }
121
122    check_missing_bounds(&expr.expr, schema, rule, &mut warnings);
123    warnings
124}
125
126/// Run all available static analyses on a CEL rule in a single pass.
127///
128/// Compiles the rule once and performs both scope validation and cost estimation.
129/// More efficient than calling [`check_rule_scope`] and [`estimate_rule_cost`] separately.
130#[must_use]
131pub fn analyze_rule(rule: &str, schema: &CompiledSchema, scope: ScopeContext) -> Vec<AnalysisWarning> {
132    let program = match Program::compile(rule) {
133        Ok(p) => p,
134        Err(_) => return vec![],
135    };
136
137    let mut warnings = Vec::new();
138
139    // Scope validation
140    let valid = valid_variables(scope);
141    for var in program.references().variables() {
142        if !valid.contains(&var) {
143            warnings.push(AnalysisWarning {
144                rule: rule.to_string(),
145                message: format!(
146                    "variable '{}' is not available in {:?} context; valid variables: {:?}",
147                    var, scope, valid
148                ),
149                kind: WarningKind::WrongScope,
150            });
151        }
152    }
153
154    // Cost estimation
155    let expr = program.expression();
156    let cost = estimate_expr_cost(&expr.expr, schema);
157    if cost > K8S_COST_BUDGET {
158        warnings.push(AnalysisWarning {
159            rule: rule.to_string(),
160            message: format!(
161                "estimated cost {} exceeds K8s budget {}; consider adding maxItems/maxLength to schema bounds",
162                cost, K8S_COST_BUDGET
163            ),
164            kind: WarningKind::CostExceeded,
165        });
166    }
167    check_missing_bounds(&expr.expr, schema, rule, &mut warnings);
168
169    warnings
170}
171
172fn estimate_expr_cost(expr: &Expr, schema: &CompiledSchema) -> u64 {
173    match expr {
174        Expr::Comprehension(comp) => {
175            let list_size = find_max_items(schema);
176            let body_cost = estimate_expr_cost(&comp.loop_step.expr, schema);
177            list_size * body_cost.max(1)
178        }
179        Expr::Call(call) => {
180            let base = 1u64;
181            let target_cost = call
182                .target
183                .as_ref()
184                .map(|t| estimate_expr_cost(&t.expr, schema))
185                .unwrap_or(0);
186            let arg_cost: u64 = call
187                .args
188                .iter()
189                .map(|a| estimate_expr_cost(&a.expr, schema))
190                .sum();
191            if is_string_traversal(&call.func_name) {
192                let str_len = find_max_length(schema);
193                base + (str_len as f64 * STRING_TRAVERSAL_FACTOR) as u64 + target_cost + arg_cost
194            } else {
195                base + target_cost + arg_cost
196            }
197        }
198        Expr::Select(sel) => 1 + estimate_expr_cost(&sel.operand.expr, schema),
199        Expr::List(list) => list
200            .elements
201            .iter()
202            .map(|e| estimate_expr_cost(&e.expr, schema))
203            .sum::<u64>()
204            .max(1),
205        _ => 1,
206    }
207}
208
209fn find_max_items(schema: &CompiledSchema) -> u64 {
210    if let Some(max) = schema.max_items {
211        return max;
212    }
213    for prop in schema.properties.values() {
214        if prop.items.is_some() {
215            return prop.max_items.unwrap_or(DEFAULT_MAX_ITEMS);
216        }
217    }
218    DEFAULT_MAX_ITEMS
219}
220
221fn find_max_length(schema: &CompiledSchema) -> u64 {
222    if let Some(max) = schema.max_length {
223        return max;
224    }
225    for prop in schema.properties.values() {
226        if let Some(max) = prop.max_length {
227            return max;
228        }
229    }
230    DEFAULT_MAX_LENGTH
231}
232
233fn is_string_traversal(func: &str) -> bool {
234    matches!(
235        func,
236        "contains"
237            | "startsWith"
238            | "endsWith"
239            | "matches"
240            | "find"
241            | "findAll"
242            | "replace"
243            | "split"
244            | "indexOf"
245            | "lastIndexOf"
246    )
247}
248
249fn check_missing_bounds(
250    expr: &Expr,
251    schema: &CompiledSchema,
252    rule: &str,
253    warnings: &mut Vec<AnalysisWarning>,
254) {
255    if let Expr::Comprehension(_) = expr {
256        for prop in schema.properties.values() {
257            if prop.items.is_some() && prop.max_items.is_none() {
258                warnings.push(AnalysisWarning {
259                    rule: rule.to_string(),
260                    message: "list field has no maxItems bound; cost estimate uses worst-case default".into(),
261                    kind: WarningKind::MissingBounds,
262                });
263                break;
264            }
265        }
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use crate::validation::compilation::compile_schema;
273    use serde_json::json;
274
275    #[test]
276    fn detect_wrong_scope_variable() {
277        let warnings = check_rule_scope(
278            "request.userInfo.username == 'admin'",
279            ScopeContext::CrdValidation,
280        );
281        assert_eq!(warnings.len(), 1);
282        assert!(warnings[0].message.contains("request"));
283        assert_eq!(warnings[0].kind, WarningKind::WrongScope);
284    }
285
286    #[test]
287    fn self_and_old_self_are_valid() {
288        let warnings = check_rule_scope("self.replicas >= oldSelf.replicas", ScopeContext::CrdValidation);
289        assert!(warnings.is_empty());
290    }
291
292    #[test]
293    fn admission_policy_scope_allows_request() {
294        let warnings = check_rule_scope(
295            "request.userInfo.username == 'admin'",
296            ScopeContext::AdmissionPolicy,
297        );
298        assert!(warnings.is_empty());
299    }
300
301    #[test]
302    fn crd_scope_rejects_object_variable() {
303        let warnings = check_rule_scope("object.metadata.name == 'test'", ScopeContext::CrdValidation);
304        assert_eq!(warnings.len(), 1);
305    }
306
307    #[test]
308    fn invalid_syntax_returns_empty() {
309        let warnings = check_rule_scope("self.x >=", ScopeContext::CrdValidation);
310        assert!(warnings.is_empty());
311    }
312
313    #[test]
314    fn unbounded_list_comprehension_warns() {
315        let schema = json!({
316            "type": "object",
317            "properties": {
318                "items": {
319                    "type": "array",
320                    "items": {"type": "string"}
321                }
322            }
323        });
324        let compiled = compile_schema(&schema);
325        let warnings = estimate_rule_cost("self.items.all(item, item.size() > 0)", &compiled);
326        assert!(
327            warnings
328                .iter()
329                .any(|w| w.kind == WarningKind::CostExceeded || w.kind == WarningKind::MissingBounds)
330        );
331    }
332
333    #[test]
334    fn bounded_list_no_cost_warning() {
335        let schema = json!({
336            "type": "object",
337            "properties": {
338                "items": {
339                    "type": "array",
340                    "maxItems": 10,
341                    "items": {"type": "string", "maxLength": 64}
342                }
343            }
344        });
345        let compiled = compile_schema(&schema);
346        let warnings = estimate_rule_cost("self.items.all(item, item.size() > 0)", &compiled);
347        // With bounded list (10 items), cost should be low
348        assert!(warnings.iter().all(|w| w.kind != WarningKind::CostExceeded));
349    }
350
351    #[test]
352    fn simple_comparison_low_cost() {
353        let schema = json!({
354            "type": "object",
355            "properties": {"x": {"type": "integer"}}
356        });
357        let compiled = compile_schema(&schema);
358        let warnings = estimate_rule_cost("self.x >= 0", &compiled);
359        assert!(warnings.is_empty());
360    }
361
362    #[test]
363    fn analyze_rule_catches_scope_issue() {
364        let schema = json!({"type": "object", "properties": {"x": {"type": "integer"}}});
365        let compiled = compile_schema(&schema);
366        let warnings = analyze_rule("request.name == 'test'", &compiled, ScopeContext::CrdValidation);
367        assert!(warnings.iter().any(|w| w.kind == WarningKind::WrongScope));
368    }
369
370    #[test]
371    fn analyze_rule_catches_cost_and_bounds() {
372        let schema = json!({
373            "type": "object",
374            "properties": {
375                "items": {"type": "array", "items": {"type": "string"}}
376            }
377        });
378        let compiled = compile_schema(&schema);
379        let warnings = analyze_rule(
380            "self.items.all(item, item.size() > 0)",
381            &compiled,
382            ScopeContext::CrdValidation,
383        );
384        // `self` should not be flagged as a scope violation
385        assert!(
386            !warnings
387                .iter()
388                .any(|w| w.kind == WarningKind::WrongScope && w.message.contains("'self'"))
389        );
390        // Missing maxItems bound should be reported
391        assert!(warnings.iter().any(|w| w.kind == WarningKind::MissingBounds));
392    }
393
394    #[test]
395    fn missing_bounds_warning() {
396        let schema = json!({
397            "type": "object",
398            "properties": {
399                "items": {
400                    "type": "array",
401                    "items": {"type": "string"}
402                }
403            }
404        });
405        let compiled = compile_schema(&schema);
406        let warnings = estimate_rule_cost("self.items.all(item, item.size() > 0)", &compiled);
407        assert!(warnings.iter().any(|w| w.kind == WarningKind::MissingBounds));
408    }
409}