Skip to main content

vellaveto_engine/
coverage.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Policy coverage analysis — identifies dead policies and coverage gaps.
9//!
10//! Phase 72: Analyzes evaluation records against the current policy set to
11//! determine which policies are actively matching, which are dead (never matched),
12//! and which tools have no matching policy.
13
14use std::collections::{HashMap, HashSet};
15use vellaveto_types::Policy;
16
17/// Coverage analysis for a single policy.
18#[derive(Debug, Clone)]
19pub struct PolicyCoverage {
20    /// Policy identifier.
21    pub policy_id: String,
22    /// Policy display name.
23    pub policy_name: String,
24    /// Number of evaluations that matched this policy.
25    pub match_count: u64,
26    /// ISO 8601 timestamp of the last match (if any).
27    pub last_matched: Option<String>,
28    /// True if this policy has never been matched.
29    pub is_dead: bool,
30    /// Fraction of total evaluations that matched this policy.
31    /// Clamped to [0.0, 1.0]. Returns 0.0 when total_evaluations is zero.
32    pub coverage_pct: f64,
33}
34
35/// A tool that was seen in evaluations but had no matching policy.
36#[derive(Debug, Clone)]
37pub struct UncoveredTool {
38    /// Tool name.
39    pub tool_name: String,
40    /// Number of times this tool was seen without a matching policy.
41    pub occurrence_count: u64,
42    /// ISO 8601 timestamp of the last time this tool was seen.
43    pub last_seen: Option<String>,
44}
45
46/// Full coverage analysis report.
47#[derive(Debug, Clone)]
48pub struct CoverageReport {
49    /// Total number of evaluation records analyzed.
50    pub total_evaluations: u64,
51    /// Total number of policies in the set.
52    pub total_policies: usize,
53    /// Policies that never matched any evaluation record.
54    pub dead_policies: Vec<PolicyCoverage>,
55    /// Policies that matched at least one evaluation record.
56    pub active_policies: Vec<PolicyCoverage>,
57    /// Tools seen in evaluations with no matching policy.
58    pub uncovered_tools: Vec<UncoveredTool>,
59    /// Fraction of policies that are active (matched at least once).
60    /// Clamped to [0.0, 1.0]. Returns 0.0 when total_policies is zero.
61    pub coverage_score: f64,
62    /// Fraction of distinct tools that had a matching policy.
63    /// Clamped to [0.0, 1.0]. Returns 0.0 when no tools were seen.
64    pub tool_coverage_score: f64,
65}
66
67/// A recorded evaluation for coverage tracking.
68#[derive(Debug, Clone)]
69pub struct EvaluationRecord {
70    /// Tool name from the evaluation.
71    pub tool: String,
72    /// Policy ID that matched, or `None` if no policy matched.
73    pub matched_policy_id: Option<String>,
74    /// ISO 8601 timestamp of the evaluation.
75    pub timestamp: String,
76}
77
78/// Policy coverage analyzer.
79///
80/// Stateless analyzer that takes a snapshot of policies and evaluation
81/// records, producing a coverage report.
82pub struct CoverageAnalyzer;
83
84impl CoverageAnalyzer {
85    /// Analyze policy coverage from evaluation records.
86    ///
87    /// Examines each evaluation record to count matches per policy and
88    /// identify tools with no matching policy. Produces coverage scores
89    /// that are always in [0.0, 1.0] (handles division by zero as 0.0).
90    pub fn analyze(policies: &[Policy], records: &[EvaluationRecord]) -> CoverageReport {
91        let total_evaluations = records.len() as u64;
92        let total_policies = policies.len();
93
94        // Build per-policy match tracking
95        let mut policy_match_count: HashMap<String, u64> = HashMap::new();
96        let mut policy_last_matched: HashMap<String, String> = HashMap::new();
97
98        // Track all tools seen and which had matches
99        let mut tool_occurrences: HashMap<String, u64> = HashMap::new();
100        let mut tool_last_seen: HashMap<String, String> = HashMap::new();
101        let mut covered_tools: HashSet<String> = HashSet::new();
102
103        // Initialize all policies with zero counts
104        for policy in policies {
105            policy_match_count.insert(policy.id.clone(), 0);
106        }
107
108        // Process each evaluation record
109        for record in records {
110            // Track tool occurrence (saturating)
111            let tool_count = tool_occurrences.entry(record.tool.clone()).or_insert(0);
112            *tool_count = tool_count.saturating_add(1);
113            tool_last_seen.insert(record.tool.clone(), record.timestamp.clone());
114
115            if let Some(ref pid) = record.matched_policy_id {
116                // Increment match count for the matched policy (saturating)
117                let count = policy_match_count.entry(pid.clone()).or_insert(0);
118                *count = count.saturating_add(1);
119                policy_last_matched.insert(pid.clone(), record.timestamp.clone());
120
121                // Mark this tool as covered
122                covered_tools.insert(record.tool.clone());
123            }
124        }
125
126        // Build policy coverage entries
127        let mut dead_policies = Vec::new();
128        let mut active_policies = Vec::new();
129
130        for policy in policies {
131            let match_count = policy_match_count.get(&policy.id).copied().unwrap_or(0);
132            let last_matched = policy_last_matched.get(&policy.id).cloned();
133            let is_dead = match_count == 0;
134            let coverage_pct = safe_divide_f64(match_count as f64, total_evaluations as f64);
135
136            let entry = PolicyCoverage {
137                policy_id: policy.id.clone(),
138                policy_name: policy.name.clone(),
139                match_count,
140                last_matched,
141                is_dead,
142                coverage_pct,
143            };
144
145            if is_dead {
146                dead_policies.push(entry);
147            } else {
148                active_policies.push(entry);
149            }
150        }
151
152        // Build uncovered tools list
153        let mut uncovered_tools = Vec::new();
154        for (tool_name, occurrence_count) in &tool_occurrences {
155            if !covered_tools.contains(tool_name) {
156                uncovered_tools.push(UncoveredTool {
157                    tool_name: tool_name.clone(),
158                    occurrence_count: *occurrence_count,
159                    last_seen: tool_last_seen.get(tool_name).cloned(),
160                });
161            }
162        }
163        // Sort uncovered tools by occurrence count descending for deterministic output
164        uncovered_tools.sort_by(|a, b| b.occurrence_count.cmp(&a.occurrence_count));
165
166        // Compute coverage scores
167        let active_count = active_policies.len() as f64;
168        let coverage_score = safe_divide_f64(active_count, total_policies as f64);
169
170        let total_distinct_tools = tool_occurrences.len() as f64;
171        let covered_tool_count = covered_tools.len() as f64;
172        let tool_coverage_score = safe_divide_f64(covered_tool_count, total_distinct_tools);
173
174        CoverageReport {
175            total_evaluations,
176            total_policies,
177            dead_policies,
178            active_policies,
179            uncovered_tools,
180            coverage_score,
181            tool_coverage_score,
182        }
183    }
184}
185
186/// Safe division that returns 0.0 on divide-by-zero, NaN, or Infinity.
187/// Result is clamped to [0.0, 1.0].
188fn safe_divide_f64(numerator: f64, denominator: f64) -> f64 {
189    if denominator == 0.0 {
190        return 0.0;
191    }
192    let result = numerator / denominator;
193    if result.is_nan() || result.is_infinite() {
194        return 0.0;
195    }
196    result.clamp(0.0, 1.0)
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use vellaveto_types::{Policy, PolicyType};
203
204    fn make_policy(id: &str, name: &str) -> Policy {
205        Policy {
206            id: id.to_string(),
207            name: name.to_string(),
208            policy_type: PolicyType::Allow,
209            priority: 0,
210            path_rules: None,
211            network_rules: None,
212        }
213    }
214
215    fn make_record(tool: &str, policy_id: Option<&str>, ts: &str) -> EvaluationRecord {
216        EvaluationRecord {
217            tool: tool.to_string(),
218            matched_policy_id: policy_id.map(|s| s.to_string()),
219            timestamp: ts.to_string(),
220        }
221    }
222
223    #[test]
224    fn test_coverage_empty_policies_and_records() {
225        let report = CoverageAnalyzer::analyze(&[], &[]);
226        assert_eq!(report.total_evaluations, 0);
227        assert_eq!(report.total_policies, 0);
228        assert!(report.dead_policies.is_empty());
229        assert!(report.active_policies.is_empty());
230        assert!(report.uncovered_tools.is_empty());
231        assert!((report.coverage_score - 0.0).abs() < f64::EPSILON);
232        assert!((report.tool_coverage_score - 0.0).abs() < f64::EPSILON);
233    }
234
235    #[test]
236    fn test_coverage_all_dead_policies() {
237        let policies = vec![make_policy("p1", "Policy A"), make_policy("p2", "Policy B")];
238        let records = vec![make_record("tool_x", None, "2026-01-01T00:00:00Z")];
239        let report = CoverageAnalyzer::analyze(&policies, &records);
240        assert_eq!(report.dead_policies.len(), 2);
241        assert_eq!(report.active_policies.len(), 0);
242        assert!((report.coverage_score - 0.0).abs() < f64::EPSILON);
243    }
244
245    #[test]
246    fn test_coverage_all_active_policies() {
247        let policies = vec![make_policy("p1", "Policy A"), make_policy("p2", "Policy B")];
248        let records = vec![
249            make_record("tool_a", Some("p1"), "2026-01-01T00:00:00Z"),
250            make_record("tool_b", Some("p2"), "2026-01-01T01:00:00Z"),
251        ];
252        let report = CoverageAnalyzer::analyze(&policies, &records);
253        assert_eq!(report.dead_policies.len(), 0);
254        assert_eq!(report.active_policies.len(), 2);
255        assert!((report.coverage_score - 1.0).abs() < f64::EPSILON);
256    }
257
258    #[test]
259    fn test_coverage_mixed_dead_and_active() {
260        let policies = vec![
261            make_policy("p1", "Active Policy"),
262            make_policy("p2", "Dead Policy"),
263            make_policy("p3", "Also Active"),
264        ];
265        let records = vec![
266            make_record("tool_a", Some("p1"), "2026-01-01T00:00:00Z"),
267            make_record("tool_b", Some("p3"), "2026-01-01T01:00:00Z"),
268        ];
269        let report = CoverageAnalyzer::analyze(&policies, &records);
270        assert_eq!(report.dead_policies.len(), 1);
271        assert_eq!(report.dead_policies[0].policy_id, "p2");
272        assert_eq!(report.active_policies.len(), 2);
273        // coverage_score = 2/3
274        assert!((report.coverage_score - 2.0 / 3.0).abs() < 0.001);
275    }
276
277    #[test]
278    fn test_coverage_uncovered_tools() {
279        let policies = vec![make_policy("p1", "Policy A")];
280        let records = vec![
281            make_record("covered_tool", Some("p1"), "2026-01-01T00:00:00Z"),
282            make_record("uncovered_tool", None, "2026-01-01T01:00:00Z"),
283            make_record("uncovered_tool", None, "2026-01-01T02:00:00Z"),
284        ];
285        let report = CoverageAnalyzer::analyze(&policies, &records);
286        assert_eq!(report.uncovered_tools.len(), 1);
287        assert_eq!(report.uncovered_tools[0].tool_name, "uncovered_tool");
288        assert_eq!(report.uncovered_tools[0].occurrence_count, 2);
289    }
290
291    #[test]
292    fn test_coverage_tool_coverage_score() {
293        let policies = vec![make_policy("p1", "Policy A")];
294        let records = vec![
295            make_record("tool_a", Some("p1"), "2026-01-01T00:00:00Z"),
296            make_record("tool_b", None, "2026-01-01T01:00:00Z"),
297        ];
298        let report = CoverageAnalyzer::analyze(&policies, &records);
299        // 1 covered tool out of 2 distinct tools = 0.5
300        assert!((report.tool_coverage_score - 0.5).abs() < f64::EPSILON);
301    }
302
303    #[test]
304    fn test_coverage_match_count_per_policy() {
305        let policies = vec![make_policy("p1", "Busy Policy")];
306        let records = vec![
307            make_record("t1", Some("p1"), "2026-01-01T00:00:00Z"),
308            make_record("t2", Some("p1"), "2026-01-01T01:00:00Z"),
309            make_record("t3", Some("p1"), "2026-01-01T02:00:00Z"),
310        ];
311        let report = CoverageAnalyzer::analyze(&policies, &records);
312        assert_eq!(report.active_policies.len(), 1);
313        assert_eq!(report.active_policies[0].match_count, 3);
314        assert!(!report.active_policies[0].is_dead);
315    }
316
317    #[test]
318    fn test_coverage_last_matched_timestamp() {
319        let policies = vec![make_policy("p1", "Policy")];
320        let records = vec![
321            make_record("t1", Some("p1"), "2026-01-01T00:00:00Z"),
322            make_record("t2", Some("p1"), "2026-01-02T00:00:00Z"),
323        ];
324        let report = CoverageAnalyzer::analyze(&policies, &records);
325        // Last matched is the timestamp of the last record (order-dependent)
326        assert_eq!(
327            report.active_policies[0].last_matched.as_deref(),
328            Some("2026-01-02T00:00:00Z")
329        );
330    }
331
332    #[test]
333    fn test_coverage_dead_policy_has_no_last_matched() {
334        let policies = vec![make_policy("dead", "Dead Policy")];
335        let records = vec![make_record("tool", None, "2026-01-01T00:00:00Z")];
336        let report = CoverageAnalyzer::analyze(&policies, &records);
337        assert_eq!(report.dead_policies.len(), 1);
338        assert!(report.dead_policies[0].last_matched.is_none());
339        assert!(report.dead_policies[0].is_dead);
340    }
341
342    #[test]
343    fn test_coverage_no_records_all_dead() {
344        let policies = vec![make_policy("p1", "Policy A"), make_policy("p2", "Policy B")];
345        let report = CoverageAnalyzer::analyze(&policies, &[]);
346        assert_eq!(report.total_evaluations, 0);
347        assert_eq!(report.dead_policies.len(), 2);
348        assert_eq!(report.active_policies.len(), 0);
349        assert!((report.coverage_score - 0.0).abs() < f64::EPSILON);
350    }
351
352    #[test]
353    fn test_coverage_pct_calculation() {
354        let policies = vec![make_policy("p1", "Policy")];
355        let records = vec![
356            make_record("t1", Some("p1"), "2026-01-01T00:00:00Z"),
357            make_record("t2", None, "2026-01-01T01:00:00Z"),
358            make_record("t3", None, "2026-01-01T02:00:00Z"),
359            make_record("t4", Some("p1"), "2026-01-01T03:00:00Z"),
360        ];
361        let report = CoverageAnalyzer::analyze(&policies, &records);
362        // p1 matched 2 out of 4 evaluations = 0.5
363        assert!((report.active_policies[0].coverage_pct - 0.5).abs() < f64::EPSILON);
364    }
365
366    #[test]
367    fn test_coverage_uncovered_tool_last_seen() {
368        let policies: Vec<Policy> = vec![];
369        let records = vec![
370            make_record("orphan", None, "2026-01-01T00:00:00Z"),
371            make_record("orphan", None, "2026-02-01T00:00:00Z"),
372        ];
373        let report = CoverageAnalyzer::analyze(&policies, &records);
374        assert_eq!(report.uncovered_tools.len(), 1);
375        assert_eq!(
376            report.uncovered_tools[0].last_seen.as_deref(),
377            Some("2026-02-01T00:00:00Z")
378        );
379    }
380
381    #[test]
382    fn test_coverage_uncovered_sorted_by_occurrence() {
383        let policies: Vec<Policy> = vec![];
384        let records = vec![
385            make_record("rare", None, "2026-01-01T00:00:00Z"),
386            make_record("common", None, "2026-01-01T01:00:00Z"),
387            make_record("common", None, "2026-01-01T02:00:00Z"),
388            make_record("common", None, "2026-01-01T03:00:00Z"),
389        ];
390        let report = CoverageAnalyzer::analyze(&policies, &records);
391        assert_eq!(report.uncovered_tools.len(), 2);
392        assert_eq!(report.uncovered_tools[0].tool_name, "common");
393        assert_eq!(report.uncovered_tools[0].occurrence_count, 3);
394        assert_eq!(report.uncovered_tools[1].tool_name, "rare");
395        assert_eq!(report.uncovered_tools[1].occurrence_count, 1);
396    }
397
398    #[test]
399    fn test_coverage_safe_divide_zero_denominator() {
400        assert!((safe_divide_f64(5.0, 0.0) - 0.0).abs() < f64::EPSILON);
401    }
402
403    #[test]
404    fn test_coverage_safe_divide_clamps_to_one() {
405        // Numerator > denominator should clamp to 1.0
406        assert!((safe_divide_f64(10.0, 5.0) - 1.0).abs() < f64::EPSILON);
407    }
408
409    #[test]
410    fn test_coverage_safe_divide_nan() {
411        assert!((safe_divide_f64(f64::NAN, 1.0) - 0.0).abs() < f64::EPSILON);
412    }
413
414    #[test]
415    fn test_coverage_policies_with_no_records_for_tool() {
416        // Policy exists but no evaluation records at all
417        let policies = vec![make_policy("p1", "Lonely Policy")];
418        let records: Vec<EvaluationRecord> = vec![];
419        let report = CoverageAnalyzer::analyze(&policies, &records);
420        assert_eq!(report.dead_policies.len(), 1);
421        assert_eq!(report.dead_policies[0].coverage_pct, 0.0);
422        assert!(report.uncovered_tools.is_empty());
423        assert!((report.tool_coverage_score - 0.0).abs() < f64::EPSILON);
424    }
425
426    #[test]
427    fn test_coverage_unknown_policy_id_in_record() {
428        // A record references a policy ID not in the policies list
429        let policies = vec![make_policy("p1", "Known Policy")];
430        let records = vec![make_record(
431            "tool",
432            Some("p_unknown"),
433            "2026-01-01T00:00:00Z",
434        )];
435        let report = CoverageAnalyzer::analyze(&policies, &records);
436        // p1 is dead (never matched), p_unknown is tracked in match_count map but not a known policy
437        assert_eq!(report.dead_policies.len(), 1);
438        assert_eq!(report.dead_policies[0].policy_id, "p1");
439        // tool is covered (had a matched_policy_id), so not in uncovered
440        assert!(report.uncovered_tools.is_empty());
441    }
442}