Skip to main content

shape_runtime/
leakage.rs

1//! Lookahead bias and data leakage detection
2//!
3//! This module provides heuristic detection of common simulation mistakes:
4//! - Using current element data for events and executing at same index
5//! - Insufficient warmup periods for stateful functions
6//! - Using future information in calculations
7//!
8//! These are "honest defaults" that warn users about potential simulation flaws.
9
10use serde::{Deserialize, Serialize};
11
12/// Leakage warning severity
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
14pub enum LeakageSeverity {
15    /// Informational - might be intentional
16    Info,
17    /// Warning - likely problematic
18    Warning,
19    /// Critical - almost certainly a bug
20    Critical,
21}
22
23/// Types of leakage detected
24#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub enum LeakageType {
26    /// Using current index for event and executing at same index
27    SameStepExecution,
28    /// Function warmup period is insufficient
29    InsufficientWarmup {
30        function: String,
31        required: usize,
32        provided: usize,
33    },
34    /// Using future data in calculation
35    FutureLookup { index: i32 },
36    /// Potential peak into future via improper index
37    SuspiciousIndex { context: String },
38}
39
40/// A single leakage warning
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct LeakageWarning {
43    /// Type of leakage
44    pub leak_type: LeakageType,
45    /// Severity level
46    pub severity: LeakageSeverity,
47    /// Human-readable message
48    pub message: String,
49    /// Location in code (if available)
50    pub location: Option<String>,
51    /// Suggested fix
52    pub suggestion: Option<String>,
53}
54
55impl LeakageWarning {
56    /// Create a same-step execution warning
57    pub fn same_step_execution(location: Option<&str>) -> Self {
58        Self {
59            leak_type: LeakageType::SameStepExecution,
60            severity: LeakageSeverity::Critical,
61            message: "Signal uses current index data and executes at same index. This is look-ahead bias - in live processing you cannot know current value until the step completes.".to_string(),
62            location: location.map(|s| s.to_string()),
63            suggestion: Some("Use execution_delay: 1 or execute at next step".to_string()),
64        }
65    }
66
67    /// Create an insufficient warmup warning
68    pub fn insufficient_warmup(function: &str, required: usize, provided: usize) -> Self {
69        Self {
70            leak_type: LeakageType::InsufficientWarmup {
71                function: function.to_string(),
72                required,
73                provided,
74            },
75            severity: if provided == 0 {
76                LeakageSeverity::Critical
77            } else if provided < required / 2 {
78                LeakageSeverity::Warning
79            } else {
80                LeakageSeverity::Info
81            },
82            message: format!(
83                "Function '{}' requires {} elements to warm up, but only {} elements of warmup provided. Early signals may be unreliable.",
84                function, required, provided
85            ),
86            location: None,
87            suggestion: Some(format!(
88                "Add warmup: {} to simulation config or skip first {} elements",
89                required, required
90            )),
91        }
92    }
93
94    /// Create a future lookup warning
95    pub fn future_lookup(index: i32, location: Option<&str>) -> Self {
96        Self {
97            leak_type: LeakageType::FutureLookup { index },
98            severity: LeakageSeverity::Critical,
99            message: format!(
100                "Accessing future data with positive index [{}]. This data is not available at decision time.",
101                index
102            ),
103            location: location.map(|s| s.to_string()),
104            suggestion: Some("Use negative or zero indices for historical data".to_string()),
105        }
106    }
107}
108
109/// Detector for leakage in a simulation
110#[derive(Debug, Default)]
111pub struct LeakageDetector {
112    pub warnings: Vec<LeakageWarning>,
113}
114
115impl LeakageDetector {
116    pub fn new() -> Self {
117        Self::default()
118    }
119
120    pub fn add_warning(&mut self, warning: LeakageWarning) {
121        self.warnings.push(warning);
122    }
123
124    pub fn report(&self) -> LeakageReport {
125        LeakageReport {
126            warnings: self.warnings.clone(),
127            total_warnings: self.warnings.len(),
128            max_severity: self
129                .warnings
130                .iter()
131                .map(|w| w.severity)
132                .max()
133                .unwrap_or(LeakageSeverity::Info),
134        }
135    }
136
137    pub fn check_row_index(&self, index: i32, context: &str) -> shape_ast::error::Result<()> {
138        if index > 0 {
139            return Err(shape_ast::error::ShapeError::RuntimeError {
140                message: format!(
141                    "Lookahead error: accessing future index {} in {}",
142                    index, context
143                ),
144                location: None,
145            });
146        }
147        Ok(())
148    }
149}
150
151#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct LeakageReport {
153    pub warnings: Vec<LeakageWarning>,
154    pub total_warnings: usize,
155    pub max_severity: LeakageSeverity,
156}