Skip to main content

perl_dap_eval/
validator.rs

1//! Expression safety validation
2//!
3//! This module provides the core validation logic for detecting dangerous
4//! operations in Perl expressions during debug evaluation.
5
6use crate::patterns::{ASSIGNMENT_OPERATORS, DANGEROUS_OPS_RE, REGEX_MUTATION_RE};
7
8/// Error type for unsafe expression detection
9#[derive(Debug, Clone, thiserror::Error)]
10pub enum ValidationError {
11    /// Expression contains a dangerous operation
12    #[error(
13        "Safe evaluation mode: potentially mutating operation '{0}' not allowed (use allowSideEffects: true)"
14    )]
15    DangerousOperation(String),
16
17    /// Expression contains an assignment operator
18    #[error(
19        "Safe evaluation mode: assignment operator '{0}' not allowed (use allowSideEffects: true)"
20    )]
21    AssignmentOperator(String),
22
23    /// Expression contains increment/decrement operators
24    #[error(
25        "Safe evaluation mode: increment/decrement operators not allowed (use allowSideEffects: true)"
26    )]
27    IncrementDecrement,
28
29    /// Expression contains backticks (shell execution)
30    #[error(
31        "Safe evaluation mode: backticks (shell execution) not allowed (use allowSideEffects: true)"
32    )]
33    Backticks,
34
35    /// Expression contains a regex mutation operator (s///, tr///, y///)
36    #[error(
37        "Safe evaluation mode: regex mutation operator '{0}' not allowed (use allowSideEffects: true)"
38    )]
39    RegexMutation(String),
40
41    /// Expression contains newlines (potential command injection)
42    #[error("Expression cannot contain newlines")]
43    ContainsNewlines,
44}
45
46/// Result type for expression validation
47pub type ValidationResult = Result<(), ValidationError>;
48
49/// Safe expression evaluator
50///
51/// Validates that expressions are safe for evaluation during debugging,
52/// blocking operations that could mutate state or have side effects.
53#[derive(Debug, Clone, Default)]
54pub struct SafeEvaluator {
55    // Future: could add configuration options here
56}
57
58impl SafeEvaluator {
59    /// Create a new safe evaluator
60    pub fn new() -> Self {
61        Self::default()
62    }
63
64    /// Validate that an expression is safe for evaluation
65    ///
66    /// # Arguments
67    ///
68    /// * `expression` - The Perl expression to validate
69    ///
70    /// # Returns
71    ///
72    /// `Ok(())` if the expression is safe, or an error describing why it's unsafe.
73    pub fn validate(&self, expression: &str) -> ValidationResult {
74        // Check for newlines (command injection vector)
75        if expression.contains('\n') || expression.contains('\r') {
76            return Err(ValidationError::ContainsNewlines);
77        }
78
79        // Check for backticks (shell execution)
80        if expression.contains('`') {
81            return Err(ValidationError::Backticks);
82        }
83
84        // Check for assignment operators
85        for op in ASSIGNMENT_OPERATORS {
86            if expression.contains(op) {
87                return Err(ValidationError::AssignmentOperator(op.to_string()));
88            }
89        }
90
91        // Check for increment/decrement operators
92        if expression.contains("++") || expression.contains("--") {
93            return Err(ValidationError::IncrementDecrement);
94        }
95
96        // Check for dangerous operations using regex
97        self.check_dangerous_operations(expression)?;
98
99        // Check for regex mutation operators
100        self.check_regex_mutation(expression)?;
101
102        Ok(())
103    }
104
105    /// Check for dangerous operations in the expression
106    fn check_dangerous_operations(&self, expression: &str) -> ValidationResult {
107        let Some(re) = DANGEROUS_OPS_RE.as_ref().ok() else {
108            // If regex failed to compile, be conservative and allow
109            return Ok(());
110        };
111
112        for mat in re.find_iter(expression) {
113            let op = mat.as_str();
114            let start = mat.start();
115            let end = mat.end();
116
117            // Allow harmless occurrences in single-quoted literals
118            if is_in_single_quotes(expression, start) {
119                continue;
120            }
121
122            // Allow sigil-prefixed identifiers ($print, @say, %exit, *printf)
123            if is_sigil_prefixed_identifier(expression, start) {
124                continue;
125            }
126
127            // Allow ${print} (simple scalar braced variable form)
128            if is_simple_braced_scalar_var(expression, start, end) {
129                continue;
130            }
131
132            // Allow package-qualified names unless it's CORE::
133            if is_package_qualified_not_core(expression, start) {
134                continue;
135            }
136
137            // Block: either bare op or CORE:: qualified
138            return Err(ValidationError::DangerousOperation(op.to_string()));
139        }
140
141        Ok(())
142    }
143
144    /// Check for regex mutation operators (s///, tr///, y///)
145    fn check_regex_mutation(&self, expression: &str) -> ValidationResult {
146        let Some(re) = REGEX_MUTATION_RE.as_ref().ok() else {
147            return Ok(());
148        };
149
150        if let Some(mat) = re.find(expression) {
151            let op = mat.as_str();
152            let start = mat.start();
153
154            // Allow sigil-prefixed identifiers ($s, $tr, $y)
155            if is_sigil_prefixed_identifier(expression, start) {
156                return Ok(());
157            }
158
159            // Allow escape sequences like \s, \y
160            if is_escape_sequence(expression, start) {
161                return Ok(());
162            }
163
164            return Err(ValidationError::RegexMutation(op.trim().to_string()));
165        }
166
167        Ok(())
168    }
169}
170
171/// Check if a position in a string is inside single quotes
172fn is_in_single_quotes(s: &str, idx: usize) -> bool {
173    let mut in_sq = false;
174    let mut escaped = false;
175
176    for (i, ch) in s.char_indices() {
177        if i >= idx {
178            break;
179        }
180        if in_sq {
181            if escaped {
182                escaped = false;
183            } else if ch == '\\' {
184                escaped = true;
185            } else if ch == '\'' {
186                in_sq = false;
187            }
188        } else if ch == '\'' {
189            in_sq = true;
190        }
191    }
192
193    in_sq
194}
195
196/// Check if a match is preceded by CORE:: (which means it IS dangerous)
197fn is_core_qualified(s: &str, op_start: usize) -> bool {
198    let s_bytes = s.as_bytes();
199    // Check for GLOBAL prefix first
200    if op_start >= 8 && &s_bytes[op_start - 8..op_start] == b"GLOBAL::" {
201        // If GLOBAL, require CORE::GLOBAL::op
202        return op_start >= 14 && &s_bytes[op_start - 14..op_start - 8] == b"CORE::";
203    }
204
205    // Check for regular CORE:: prefix
206    op_start >= 6 && &s_bytes[op_start - 6..op_start] == b"CORE::"
207}
208
209/// Check if the match is a sigil-prefixed identifier ($print, @say, %exit, *dump)
210fn is_sigil_prefixed_identifier(s: &str, op_start: usize) -> bool {
211    let bytes = s.as_bytes();
212    if op_start == 0 {
213        return false;
214    }
215
216    // Must be preceded by a sigil
217    if !matches!(bytes[op_start - 1], b'$' | b'@' | b'%' | b'*') {
218        return false;
219    }
220
221    // Security: Check it's not being used for code execution (&$sub or ->$method)
222    let mut i = op_start - 1;
223    while i > 0 && bytes[i - 1].is_ascii_whitespace() {
224        i -= 1;
225    }
226
227    if i > 0 {
228        let prev = bytes[i - 1];
229
230        // &$sub is a code dereference (dangerous)
231        if prev == b'&' {
232            return false;
233        }
234
235        // ->$method is a method call (potentially dangerous)
236        if prev == b'>' && i > 1 && bytes[i - 2] == b'-' {
237            return false;
238        }
239
240        // Handle braced dereference &{ $sub }
241        if prev == b'{' {
242            i -= 1;
243            while i > 0 && bytes[i - 1].is_ascii_whitespace() {
244                i -= 1;
245            }
246            if i > 0 && bytes[i - 1] == b'&' {
247                return false;
248            }
249        }
250    }
251
252    true
253}
254
255/// Check if the match is a simple braced scalar variable ${print}
256fn is_simple_braced_scalar_var(s: &str, op_start: usize, op_end: usize) -> bool {
257    let bytes = s.as_bytes();
258
259    // Scan left for `${` (allow whitespace between)
260    let mut i = op_start;
261    while i > 0 && bytes[i - 1].is_ascii_whitespace() {
262        i -= 1;
263    }
264    if i < 1 || bytes[i - 1] != b'{' {
265        return false;
266    }
267    i -= 1;
268    while i > 0 && bytes[i - 1].is_ascii_whitespace() {
269        i -= 1;
270    }
271    if i < 1 || bytes[i - 1] != b'$' {
272        return false;
273    }
274
275    // Scan right for `}` (allow whitespace between)
276    let mut j = op_end;
277    while j < bytes.len() && bytes[j].is_ascii_whitespace() {
278        j += 1;
279    }
280    j < bytes.len() && bytes[j] == b'}'
281}
282
283/// Check if the match is package-qualified (Foo::print) but not CORE::
284fn is_package_qualified_not_core(s: &str, op_start: usize) -> bool {
285    let bytes = s.as_bytes();
286    if op_start < 2 || bytes[op_start - 1] != b':' || bytes[op_start - 2] != b':' {
287        return false;
288    }
289    // It's qualified, but we need to check it's not CORE::
290    !is_core_qualified(s, op_start)
291}
292
293/// Check if the match is an escape sequence (preceded by backslash)
294fn is_escape_sequence(s: &str, match_start: usize) -> bool {
295    if match_start == 0 {
296        return false;
297    }
298    s.as_bytes()[match_start - 1] == b'\\'
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[test]
306    fn test_safe_expressions() {
307        let evaluator = SafeEvaluator::new();
308
309        // Simple arithmetic
310        assert!(evaluator.validate("$x + $y").is_ok());
311        assert!(evaluator.validate("$hash{key}").is_ok());
312        assert!(evaluator.validate("$array[0]").is_ok());
313        assert!(evaluator.validate("length($str)").is_ok());
314
315        // Package-qualified (not CORE)
316        assert!(evaluator.validate("Foo::print").is_ok());
317        assert!(evaluator.validate("My::Module::system").is_ok());
318    }
319
320    #[test]
321    fn test_dangerous_operations() {
322        let evaluator = SafeEvaluator::new();
323
324        // Code execution
325        assert!(evaluator.validate("eval('code')").is_err());
326        assert!(evaluator.validate("system('ls')").is_err());
327        assert!(evaluator.validate("exec('/bin/sh')").is_err());
328
329        // I/O
330        assert!(evaluator.validate("print 'hello'").is_err());
331        assert!(evaluator.validate("open(FH, '<', 'file')").is_err());
332    }
333
334    #[test]
335    fn test_sigil_prefixed_identifiers() {
336        let evaluator = SafeEvaluator::new();
337
338        // These should be allowed (they're variable names, not operations)
339        assert!(evaluator.validate("$print").is_ok());
340        assert!(evaluator.validate("@say").is_ok());
341        assert!(evaluator.validate("%exit").is_ok());
342        assert!(evaluator.validate("$system_name").is_ok());
343    }
344
345    #[test]
346    fn test_braced_variables() {
347        let evaluator = SafeEvaluator::new();
348
349        // ${print} is a variable, should be allowed
350        assert!(evaluator.validate("${print}").is_ok());
351    }
352
353    #[test]
354    fn test_assignment_operators() {
355        let evaluator = SafeEvaluator::new();
356
357        assert!(evaluator.validate("$x = 1").is_err());
358        assert!(evaluator.validate("$x += 1").is_err());
359        assert!(evaluator.validate("$x .= 'str'").is_err());
360    }
361
362    #[test]
363    fn test_increment_decrement() {
364        let evaluator = SafeEvaluator::new();
365
366        assert!(evaluator.validate("$x++").is_err());
367        assert!(evaluator.validate("++$x").is_err());
368        assert!(evaluator.validate("$x--").is_err());
369    }
370
371    #[test]
372    fn test_backticks() {
373        let evaluator = SafeEvaluator::new();
374
375        assert!(evaluator.validate("`ls -la`").is_err());
376    }
377
378    #[test]
379    fn test_newlines() {
380        let evaluator = SafeEvaluator::new();
381
382        assert!(evaluator.validate("1\nprint 'hacked'").is_err());
383        assert!(evaluator.validate("1\rprint 'hacked'").is_err());
384    }
385
386    #[test]
387    fn test_regex_mutation() {
388        let evaluator = SafeEvaluator::new();
389
390        assert!(evaluator.validate("s/foo/bar/").is_err());
391        assert!(evaluator.validate("tr/a-z/A-Z/").is_err());
392        assert!(evaluator.validate("y/abc/xyz/").is_err());
393    }
394
395    #[test]
396    fn test_escape_sequences_allowed() {
397        let evaluator = SafeEvaluator::new();
398
399        // \s in a regex match pattern should be allowed (it's not s///)
400        // However, our simple regex catches it - this is a known limitation
401        // The validator allows escape sequences like \s
402        assert!(evaluator.validate("/\\s+/").is_ok());
403    }
404
405    #[test]
406    fn test_single_quoted_strings() {
407        let evaluator = SafeEvaluator::new();
408
409        // Ops inside single quotes should be allowed (they're literal strings)
410        assert!(evaluator.validate("'print this'").is_ok());
411        assert!(evaluator.validate("'system call'").is_ok());
412    }
413}